## Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
plt.rcParams["patch.force_edgecolor"]=True
# Importing the data dictionary
data_dictionary = pd.read_excel('CREDIT CARD USERS DATA.xlsx',sheet_name=None)
data_dictionary
OrderedDict([('customer_dbase',
custid region townsize gender age agecat birthmonth ed \
0 3964-QJWTRG-NPN 1 2.0 1 20 2 September 15
1 0648-AIPJSP-UVM 5 5.0 0 22 2 May 17
2 5195-TLUDJE-HVO 3 4.0 1 67 6 June 14
3 4459-VLPQUH-3OL 4 3.0 0 23 2 May 16
4 8158-SMTQFB-CNO 2 2.0 0 26 3 July 16
5 9662-FUSYIM-1IV 4 4.0 0 64 5 August 17
6 7432-QKQFJJ-K72 2 5.0 1 52 5 July 14
7 8959-RZWRHU-ST8 3 4.0 1 44 4 October 16
8 9124-DZALHM-S6I 2 3.0 1 66 6 October 12
9 3512-MUWBGY-52X 2 2.0 0 47 4 July 11
10 5621-QSZPSF-NF2 4 1.0 1 59 5 July 19
11 8241-PWPONH-62O 2 4.0 1 33 3 October 8
12 8795-FYOXCT-P09 5 2.0 0 44 4 March 10
13 1705-NMIQNO-IC4 3 2.0 0 58 5 January 18
14 9205-PAZEXY-90Q 2 1.0 1 72 6 December 20
15 4225-PZZDIY-IBH 3 1.0 1 66 6 December 13
16 0758-EQEGIQ-3OF 1 1.0 1 57 5 October 17
17 0649-TBFJFL-QU4 5 2.0 0 63 5 May 14
18 2228-KOLOPU-FY3 5 5.0 1 28 3 April 11
19 3853-NVDCOJ-TIN 1 1.0 1 78 6 June 16
20 0765-UXAFYM-PDR 3 3.0 0 61 5 June 16
21 9937-SFPLRK-H9Y 2 4.0 0 70 6 September 17
22 0712-WQXYVV-HUP 4 1.0 0 61 5 April 14
23 6441-FJUWZQ-7G8 3 4.0 0 37 4 August 11
24 7634-AVNEXZ-7AG 1 3.0 0 39 4 October 12
25 2041-PNMGHX-TXJ 4 3.0 1 73 6 November 14
26 4626-BQZAUJ-V9K 5 4.0 1 26 3 June 16
27 2969-ODPCDX-5DC 2 1.0 1 24 2 April 17
28 4974-FUBHDF-Z7L 3 1.0 1 77 6 May 15
29 2525-OSULNV-0KS 5 3.0 0 36 4 March 19
... ... ... ... ... ... ... ... ..
4970 1785-IYYYGN-HTR 1 2.0 1 79 6 April 17
4971 6634-HQYWLH-6M4 4 5.0 1 71 6 May 12
4972 6233-HYDJPL-VLO 1 1.0 1 47 4 November 12
4973 5646-BOZIOF-3B6 5 5.0 0 30 3 February 21
4974 0225-LFNXNQ-CBB 5 3.0 1 37 4 December 14
4975 3517-FHPTRR-Q9L 5 4.0 0 28 3 July 15
4976 3625-EIKFES-W21 2 1.0 0 53 5 November 12
4977 5097-BJPHAE-TYU 5 5.0 1 41 4 December 10
4978 8893-JSYRMQ-3VA 4 1.0 0 22 2 November 11
4979 1802-COVRXB-K3B 2 1.0 0 71 6 April 10
4980 0392-XGGPFB-SFH 5 1.0 0 33 3 March 15
4981 0388-KHMXUA-ZCP 1 1.0 0 59 5 November 9
4982 0908-ERJHRU-CFL 3 3.0 1 61 5 January 17
4983 0455-ZMAKZN-6ID 2 2.0 0 58 5 December 15
4984 3282-RFORQB-5Z6 1 3.0 0 30 3 December 17
4985 0997-UKGSUF-SRI 4 2.0 1 30 3 March 20
4986 3219-ODPZKT-U6P 1 1.0 0 22 2 May 12
4987 3883-BTASOR-CD9 5 4.0 1 49 4 May 15
4988 4111-ARMZIV-2MI 2 2.0 0 61 5 February 15
4989 6431-FGVIYD-FCN 4 2.0 0 79 6 October 17
4990 3192-STGTEL-D14 3 3.0 1 26 3 September 21
4991 6841-FBQILD-2CH 2 2.0 1 59 5 November 18
4992 6309-HRNBPZ-565 4 4.0 1 55 5 December 15
4993 8563-YGTRBK-25I 3 3.0 0 56 5 April 18
4994 1973-VJDGJA-TQ6 3 1.0 1 35 4 April 15
4995 3675-GZFGOT-QJN 2 2.0 0 68 6 January 10
4996 4699-LEPCCE-3UD 3 3.0 0 51 5 May 14
4997 8485-LLUICH-CVV 4 5.0 0 75 6 August 17
4998 9325-URAAUT-7FA 1 1.0 0 47 4 December 19
4999 8027-EXDZBF-OGR 3 5.0 1 41 4 May 10
edcat jobcat ... owncd ownpda ownpc ownipod owngame \
0 3 1 ... 0 0 0 1 1
1 4 2 ... 1 1 1 1 1
2 2 2 ... 1 0 0 0 0
3 3 2 ... 1 0 1 1 1
4 3 2 ... 1 0 1 0 1
5 4 3 ... 1 1 0 0 0
6 2 1 ... 1 0 1 1 0
7 3 1 ... 1 0 0 0 0
8 2 1 ... 1 0 0 0 0
9 1 6 ... 1 0 0 0 0
10 4 1 ... 1 0 1 1 0
11 1 2 ... 1 0 0 0 0
12 1 1 ... 1 0 1 0 0
13 4 1 ... 1 0 1 0 0
14 5 4 ... 1 0 1 0 1
15 2 6 ... 1 0 1 0 0
16 4 4 ... 1 1 1 1 1
17 2 6 ... 1 0 0 0 0
18 1 1 ... 1 1 0 0 0
19 3 1 ... 0 0 1 1 0
20 3 1 ... 1 0 1 1 1
21 4 2 ... 1 0 1 0 1
22 2 6 ... 1 0 1 0 1
23 1 1 ... 1 1 1 1 1
24 2 1 ... 1 0 1 1 0
25 2 3 ... 1 1 1 1 1
26 3 2 ... 1 0 1 1 1
27 4 6 ... 1 0 0 1 1
28 3 1 ... 1 0 0 0 0
29 4 1 ... 1 0 1 1 1
... ... ... ... ... ... ... ... ...
4970 4 2 ... 0 0 1 1 1
4971 2 5 ... 1 0 1 0 1
4972 2 2 ... 1 0 1 0 1
4973 5 2 ... 1 1 1 1 1
4974 2 6 ... 1 0 1 1 1
4975 3 3 ... 1 0 1 1 1
4976 2 1 ... 1 0 0 0 0
4977 1 2 ... 0 0 0 1 0
4978 1 6 ... 0 0 0 0 0
4979 1 2 ... 1 0 1 1 0
4980 3 2 ... 1 0 0 1 1
4981 1 1 ... 1 0 0 0 0
4982 4 2 ... 1 0 1 1 1
4983 3 2 ... 1 0 1 0 0
4984 4 1 ... 1 0 0 0 0
4985 5 2 ... 1 1 1 1 0
4986 2 2 ... 0 0 0 1 1
4987 3 2 ... 1 0 1 0 0
4988 3 2 ... 1 0 1 1 0
4989 4 2 ... 1 0 1 1 0
4990 5 5 ... 1 1 1 1 1
4991 4 2 ... 1 0 1 1 1
4992 3 2 ... 1 0 1 0 1
4993 4 2 ... 1 0 1 0 1
4994 3 2 ... 1 0 0 0 1
4995 1 1 ... 1 0 0 0 0
4996 2 1 ... 1 0 0 0 0
4997 4 1 ... 1 1 0 0 0
4998 4 2 ... 1 0 1 1 1
4999 1 5 ... 1 0 0 0 0
ownfax news response_01 response_02 response_03
0 0 0 0 1 0
1 1 1 0 0 0
2 0 1 0 0 0
3 0 1 1 0 0
4 0 0 0 1 0
5 0 0 0 1 0
6 0 0 0 0 0
7 0 1 0 0 0
8 0 0 1 0 0
9 0 0 0 0 0
10 0 0 0 0 0
11 0 0 0 0 0
12 0 0 0 0 0
13 0 1 0 0 0
14 1 1 0 0 0
15 1 1 0 0 1
16 1 0 0 0 1
17 0 1 0 0 0
18 0 0 1 0 0
19 0 0 0 0 0
20 0 0 0 0 0
21 0 0 0 1 0
22 0 0 0 0 0
23 0 1 0 0 0
24 0 0 0 0 0
25 0 1 0 0 0
26 1 0 0 0 0
27 0 0 0 1 0
28 0 1 0 0 0
29 0 0 0 0 0
... ... ... ... ... ...
4970 1 0 0 0 0
4971 0 1 0 0 0
4972 0 1 0 0 0
4973 1 0 0 0 1
4974 1 0 0 0 0
4975 0 0 0 0 0
4976 0 0 0 1 0
4977 0 0 0 1 0
4978 0 0 0 0 0
4979 0 1 0 0 0
4980 1 0 0 0 0
4981 0 1 1 1 0
4982 1 0 0 0 0
4983 0 0 0 0 0
4984 0 1 0 0 0
4985 0 0 0 0 1
4986 0 0 0 0 0
4987 0 1 0 0 0
4988 0 0 0 1 0
4989 0 1 0 1 0
4990 1 0 0 0 0
4991 1 0 0 0 0
4992 1 0 0 0 1
4993 1 0 0 1 0
4994 0 0 0 0 0
4995 0 1 0 0 0
4996 0 0 0 0 0
4997 0 1 0 0 0
4998 0 1 0 0 0
4999 0 0 0 0 0
[5000 rows x 132 columns]),
('Data Dictionary',
Numeric Variable Label Unnamed: 2 \
0 custid Customer ID NaN
1 region Geographic indicator NaN
2 townsize Size of hometown NaN
3 gender Gender NaN
4 age Age in years NaN
5 agecat Age category NaN
6 birthmonth Birth month NaN
7 ed Years of education NaN
8 edcat Level of education NaN
9 jobcat Job category NaN
10 union Union member NaN
11 employ Years with current employer NaN
12 empcat Years with current employer NaN
13 retire Retired NaN
14 income Household income in thousands NaN
15 lninc Log-income NaN
16 inccat Income category in thousands NaN
17 debtinc Debt to income ratio (x100) NaN
18 creddebt Credit card debt in thousands NaN
19 lncreddebt Log-credit card debt NaN
20 othdebt Other debt in thousands NaN
21 lnothdebt Log-Other debt NaN
22 default Ever defaulted on a bank loan NaN
23 jobsat Job satisfaction NaN
24 marital Marital status NaN
25 spoused Spouse years of education NaN
26 spousedcat Spouse level of education NaN
27 reside Number of people in household NaN
28 pets Number of pets owned NaN
29 pets_cats Number of cats owned NaN
.. ... ... ...
413 NaN NaN NaN
414 NaN NaN NaN
415 NaN NaN NaN
416 NaN NaN NaN
417 NaN NaN NaN
418 NaN NaN NaN
419 NaN NaN NaN
420 NaN NaN NaN
421 NaN NaN NaN
422 NaN NaN NaN
423 NaN NaN NaN
424 NaN NaN NaN
425 NaN NaN NaN
426 NaN NaN NaN
427 NaN NaN NaN
428 NaN NaN NaN
429 NaN NaN NaN
430 NaN NaN NaN
431 NaN NaN NaN
432 NaN NaN NaN
433 NaN NaN NaN
434 NaN NaN NaN
435 NaN NaN NaN
436 NaN NaN NaN
437 NaN NaN NaN
438 NaN NaN NaN
439 NaN NaN NaN
440 NaN NaN NaN
441 NaN NaN NaN
442 NaN NaN NaN
Categorical Variable Unnamed: 4 Label.1
0 region 1 Zone 1
1 NaN 2 Zone 2
2 NaN 3 Zone 3
3 NaN 4 Zone 4
4 NaN 5 Zone 5
5 townsize 1 > 250,000
6 NaN 2 50,000-249,999
7 NaN 3 10,000-49,999
8 NaN 4 2,500-9,999
9 NaN 5 < 2,500
10 gender 0 Male
11 NaN 1 Female
12 agecat 1 <18
13 NaN 2 18-24
14 NaN 3 25-34
15 NaN 4 35-49
16 NaN 5 50-64
17 NaN 6 >65
18 NaN 9 No response
19 birthmonth April April
20 NaN August August
21 NaN December December
22 NaN February February
23 NaN January January
24 NaN July July
25 NaN June June
26 NaN March March
27 NaN May May
28 NaN November November
29 NaN October October
.. ... ... ...
413 confer 0 No
414 NaN 1 Yes
415 ebill 0 No
416 NaN 1 Yes
417 owntv 0 No
418 NaN 1 Yes
419 ownvcr 0 No
420 NaN 1 Yes
421 owndvd 0 No
422 NaN 1 Yes
423 owncd 0 No
424 NaN 1 Yes
425 ownpda 0 No
426 NaN 1 Yes
427 ownpc 0 No
428 NaN 1 Yes
429 ownipod 0 No
430 NaN 1 Yes
431 owngame 0 No
432 NaN 1 Yes
433 ownfax 0 No
434 NaN 1 Yes
435 news 0 No
436 NaN 1 Yes
437 response_01 0 No
438 NaN 1 Yes
439 response_02 0 No
440 NaN 1 Yes
441 response_03 0 No
442 NaN 1 Yes
[443 rows x 6 columns]),
('Business Problem',
Unnamed: 0 Unnamed: 1
0 NaN Business Problem
1 NaN Company collected data from 5000 customers. Th...
2 NaN Priotize the drivers based on the importance. )])
data_dictionary.keys()
odict_keys(['customer_dbase', 'Data Dictionary', 'Business Problem'])
data_dictionary['Business Problem']
| Unnamed: 0 | Unnamed: 1 | |
|---|---|---|
| 0 | NaN | Business Problem |
| 1 | NaN | Company collected data from 5000 customers. Th... |
| 2 | NaN | Priotize the drivers based on the importance. |
data_dictionary['Business Problem']['Unnamed: 1'][1]
"Company collected data from 5000 customers. The objective of this case study is to understand what's driving the total spend of credit card(Primary Card + Secondary card)"
data_dictionary['Data Dictionary']
| Numeric Variable | Label | Unnamed: 2 | Categorical Variable | Unnamed: 4 | Label.1 | |
|---|---|---|---|---|---|---|
| 0 | custid | Customer ID | NaN | region | 1 | Zone 1 |
| 1 | region | Geographic indicator | NaN | NaN | 2 | Zone 2 |
| 2 | townsize | Size of hometown | NaN | NaN | 3 | Zone 3 |
| 3 | gender | Gender | NaN | NaN | 4 | Zone 4 |
| 4 | age | Age in years | NaN | NaN | 5 | Zone 5 |
| 5 | agecat | Age category | NaN | townsize | 1 | > 250,000 |
| 6 | birthmonth | Birth month | NaN | NaN | 2 | 50,000-249,999 |
| 7 | ed | Years of education | NaN | NaN | 3 | 10,000-49,999 |
| 8 | edcat | Level of education | NaN | NaN | 4 | 2,500-9,999 |
| 9 | jobcat | Job category | NaN | NaN | 5 | < 2,500 |
| 10 | union | Union member | NaN | gender | 0 | Male |
| 11 | employ | Years with current employer | NaN | NaN | 1 | Female |
| 12 | empcat | Years with current employer | NaN | agecat | 1 | <18 |
| 13 | retire | Retired | NaN | NaN | 2 | 18-24 |
| 14 | income | Household income in thousands | NaN | NaN | 3 | 25-34 |
| 15 | lninc | Log-income | NaN | NaN | 4 | 35-49 |
| 16 | inccat | Income category in thousands | NaN | NaN | 5 | 50-64 |
| 17 | debtinc | Debt to income ratio (x100) | NaN | NaN | 6 | >65 |
| 18 | creddebt | Credit card debt in thousands | NaN | NaN | 9 | No response |
| 19 | lncreddebt | Log-credit card debt | NaN | birthmonth | April | April |
| 20 | othdebt | Other debt in thousands | NaN | NaN | August | August |
| 21 | lnothdebt | Log-Other debt | NaN | NaN | December | December |
| 22 | default | Ever defaulted on a bank loan | NaN | NaN | February | February |
| 23 | jobsat | Job satisfaction | NaN | NaN | January | January |
| 24 | marital | Marital status | NaN | NaN | July | July |
| 25 | spoused | Spouse years of education | NaN | NaN | June | June |
| 26 | spousedcat | Spouse level of education | NaN | NaN | March | March |
| 27 | reside | Number of people in household | NaN | NaN | May | May |
| 28 | pets | Number of pets owned | NaN | NaN | November | November |
| 29 | pets_cats | Number of cats owned | NaN | NaN | October | October |
| ... | ... | ... | ... | ... | ... | ... |
| 413 | NaN | NaN | NaN | confer | 0 | No |
| 414 | NaN | NaN | NaN | NaN | 1 | Yes |
| 415 | NaN | NaN | NaN | ebill | 0 | No |
| 416 | NaN | NaN | NaN | NaN | 1 | Yes |
| 417 | NaN | NaN | NaN | owntv | 0 | No |
| 418 | NaN | NaN | NaN | NaN | 1 | Yes |
| 419 | NaN | NaN | NaN | ownvcr | 0 | No |
| 420 | NaN | NaN | NaN | NaN | 1 | Yes |
| 421 | NaN | NaN | NaN | owndvd | 0 | No |
| 422 | NaN | NaN | NaN | NaN | 1 | Yes |
| 423 | NaN | NaN | NaN | owncd | 0 | No |
| 424 | NaN | NaN | NaN | NaN | 1 | Yes |
| 425 | NaN | NaN | NaN | ownpda | 0 | No |
| 426 | NaN | NaN | NaN | NaN | 1 | Yes |
| 427 | NaN | NaN | NaN | ownpc | 0 | No |
| 428 | NaN | NaN | NaN | NaN | 1 | Yes |
| 429 | NaN | NaN | NaN | ownipod | 0 | No |
| 430 | NaN | NaN | NaN | NaN | 1 | Yes |
| 431 | NaN | NaN | NaN | owngame | 0 | No |
| 432 | NaN | NaN | NaN | NaN | 1 | Yes |
| 433 | NaN | NaN | NaN | ownfax | 0 | No |
| 434 | NaN | NaN | NaN | NaN | 1 | Yes |
| 435 | NaN | NaN | NaN | news | 0 | No |
| 436 | NaN | NaN | NaN | NaN | 1 | Yes |
| 437 | NaN | NaN | NaN | response_01 | 0 | No |
| 438 | NaN | NaN | NaN | NaN | 1 | Yes |
| 439 | NaN | NaN | NaN | response_02 | 0 | No |
| 440 | NaN | NaN | NaN | NaN | 1 | Yes |
| 441 | NaN | NaN | NaN | response_03 | 0 | No |
| 442 | NaN | NaN | NaN | NaN | 1 | Yes |
443 rows × 6 columns
num_var = dict(zip(data_dictionary['Data Dictionary']['Numeric Variable'],data_dictionary['Data Dictionary']['Numeric Variable']))
num_var
{'custid': 'custid',
'region': 'region',
'townsize': 'townsize',
'gender': 'gender',
'age': 'age',
'agecat': 'agecat',
'birthmonth': 'birthmonth',
'ed': 'ed',
'edcat': 'edcat',
'jobcat': 'jobcat',
'union': 'union',
'employ': 'employ',
'empcat': 'empcat',
'retire': 'retire',
'income': 'income',
'lninc': 'lninc',
'inccat': 'inccat',
'debtinc': 'debtinc',
'creddebt': 'creddebt',
'lncreddebt': 'lncreddebt',
'othdebt': 'othdebt',
'lnothdebt': 'lnothdebt',
'default': 'default',
'jobsat': 'jobsat',
'marital': 'marital',
'spoused': 'spoused',
'spousedcat': 'spousedcat',
'reside': 'reside',
'pets': 'pets',
'pets_cats': 'pets_cats',
'pets_dogs': 'pets_dogs',
'pets_birds': 'pets_birds',
'pets_reptiles': 'pets_reptiles',
'pets_small': 'pets_small',
'pets_saltfish': 'pets_saltfish',
'pets_freshfish': 'pets_freshfish',
'homeown': 'homeown',
'hometype': 'hometype',
'address': 'address',
'addresscat': 'addresscat',
'cars': 'cars',
'carown': 'carown',
'cartype': 'cartype',
'carvalue': 'carvalue',
'carcatvalue': 'carcatvalue',
'carbought': 'carbought',
'carbuy': 'carbuy',
'commute': 'commute',
'commutecat': 'commutecat',
'commutetime': 'commutetime',
'commutecar': 'commutecar',
'commutemotorcycle': 'commutemotorcycle',
'commutecarpool': 'commutecarpool',
'commutebus': 'commutebus',
'commuterail': 'commuterail',
'commutepublic': 'commutepublic',
'commutebike': 'commutebike',
'commutewalk': 'commutewalk',
'commutenonmotor': 'commutenonmotor',
'telecommute': 'telecommute',
'reason': 'reason',
'polview': 'polview',
'polparty': 'polparty',
'polcontrib': 'polcontrib',
'vote': 'vote',
'card': 'card',
'cardtype': 'cardtype',
'cardbenefit': 'cardbenefit',
'cardfee': 'cardfee',
'cardtenure': 'cardtenure',
'cardtenurecat': 'cardtenurecat',
'card2': 'card2',
'card2type': 'card2type',
'card2benefit': 'card2benefit',
'card2fee': 'card2fee',
'card2tenure': 'card2tenure',
'card2tenurecat': 'card2tenurecat',
'carditems': 'carditems',
'cardspent': 'cardspent',
'card2items': 'card2items',
'card2spent': 'card2spent',
'active': 'active',
'bfast': 'bfast',
'tenure': 'tenure',
'churn': 'churn',
'longmon': 'longmon',
'lnlongmon': 'lnlongmon',
'longten': 'longten',
'lnlongten': 'lnlongten',
'tollfree': 'tollfree',
'tollmon': 'tollmon',
'lntollmon': 'lntollmon',
'tollten': 'tollten',
'lntollten': 'lntollten',
'equip': 'equip',
'equipmon': 'equipmon',
'lnequipmon': 'lnequipmon',
'equipten': 'equipten',
'lnequipten': 'lnequipten',
'callcard': 'callcard',
'cardmon': 'cardmon',
'lncardmon': 'lncardmon',
'cardten': 'cardten',
'lncardten': 'lncardten',
'wireless': 'wireless',
'wiremon': 'wiremon',
'lnwiremon': 'lnwiremon',
'wireten': 'wireten',
'lnwireten': 'lnwireten',
'multline': 'multline',
'voice': 'voice',
'pager': 'pager',
'internet': 'internet',
'callid': 'callid',
'callwait': 'callwait',
'forward': 'forward',
'confer': 'confer',
'ebill': 'ebill',
'owntv': 'owntv',
'hourstv': 'hourstv',
'ownvcr': 'ownvcr',
'owndvd': 'owndvd',
'owncd': 'owncd',
'ownpda': 'ownpda',
'ownpc': 'ownpc',
'ownipod': 'ownipod',
'owngame': 'owngame',
'ownfax': 'ownfax',
'news': 'news',
'response_01': 'response_01',
'response_02': 'response_02',
'response_03': 'response_03',
nan: nan}
dataset=data_dictionary['customer_dbase']
dataset.head()
| custid | region | townsize | gender | age | agecat | birthmonth | ed | edcat | jobcat | ... | owncd | ownpda | ownpc | ownipod | owngame | ownfax | news | response_01 | response_02 | response_03 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3964-QJWTRG-NPN | 1 | 2.0 | 1 | 20 | 2 | September | 15 | 3 | 1 | ... | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 |
| 1 | 0648-AIPJSP-UVM | 5 | 5.0 | 0 | 22 | 2 | May | 17 | 4 | 2 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 |
| 2 | 5195-TLUDJE-HVO | 3 | 4.0 | 1 | 67 | 6 | June | 14 | 2 | 2 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 3 | 4459-VLPQUH-3OL | 4 | 3.0 | 0 | 23 | 2 | May | 16 | 3 | 2 | ... | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 0 |
| 4 | 8158-SMTQFB-CNO | 2 | 2.0 | 0 | 26 | 3 | July | 16 | 3 | 2 | ... | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 |
5 rows × 132 columns
# Removing custid, birthmonth column
dataset = dataset.drop(['custid', 'birthmonth'], axis = 1)
def var_summary(x):
return pd.Series([(x.isnull().sum()/len(x))*100, x.dtype, len(x.dropna().unique()), len(x), x.count(), x.isnull().sum(), x.sum(), x.dropna().mean(), x.dropna().median(), x.dropna().std(), x.dropna().var(), x.min(), x.dropna().quantile(0.01), x.dropna().quantile(0.05),x.dropna().quantile(0.10),x.dropna().quantile(0.25),x.dropna().quantile(0.50),x.dropna().quantile(0.75), x.dropna().quantile(0.90),x.dropna().quantile(0.95), x.dropna().quantile(0.99),x.max(), (x.dropna().quantile(0.75) - x.dropna().quantile(0.25)), (x.dropna().mean()+3*(x.dropna().std())), (x.dropna().mean()-3*(x.dropna().std())), (x.dropna().quantile(0.25)-(1.5*(x.dropna().quantile(0.75) - x.dropna().quantile(0.25)))), (x.dropna().quantile(0.75)+(1.5*(x.dropna().quantile(0.75) - x.dropna().quantile(0.25)))), ((x.max() > x.dropna().mean()+3*x.dropna().std()) | (x.min() < x.dropna().mean()-3*x.dropna().std())), ((x.max() > x.dropna().quantile(0.75)+(1.5*(x.dropna().quantile(0.75) - x.dropna().quantile(0.25)))) | (x.min() < x.dropna().quantile(0.25)-(1.5*(x.dropna().quantile(0.75) - x.dropna().quantile(0.25)))))] ,
index=['NMISS%', 'D-type', 'unique', 'Length', 'N', 'NMISS', 'SUM', 'MEAN','MEDIAN', 'STD', 'VAR', 'MIN', 'P1' , 'P5' ,'P10(Q1)' ,'P25' ,'P50(Q2)', 'P75(Q3)' ,'P90' ,'P95' ,'P99' ,'MAX', 'IQR', '+3std', '-3std', 'lowerB', 'upperB', 'outlier 1', 'outlier 2'])
dataset.apply(lambda x: var_summary(x)).T
| NMISS% | D-type | unique | Length | N | NMISS | SUM | MEAN | MEDIAN | STD | ... | P95 | P99 | MAX | IQR | +3std | -3std | lowerB | upperB | outlier 1 | outlier 2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| region | 0 | int64 | 5 | 5000 | 5000 | 0 | 15007 | 3.0014 | 3 | 1.42176 | ... | 5 | 5 | 5 | 2 | 7.26668 | -1.26388 | -1 | 7 | False | False |
| townsize | 0.04 | float64 | 5 | 5000 | 4998 | 2 | 13431 | 2.68727 | 3 | 1.42592 | ... | 5 | 5 | 5 | 3 | 6.96505 | -1.5905 | -3.5 | 8.5 | False | False |
| gender | 0 | int64 | 2 | 5000 | 5000 | 0 | 2518 | 0.5036 | 1 | 0.500037 | ... | 1 | 1 | 1 | 1 | 2.00371 | -0.996511 | -1.5 | 2.5 | False | False |
| age | 0 | int64 | 62 | 5000 | 5000 | 0 | 235128 | 47.0256 | 47 | 17.7703 | ... | 76 | 79 | 79 | 31 | 100.337 | -6.28541 | -15.5 | 108.5 | False | False |
| agecat | 0 | int64 | 5 | 5000 | 5000 | 0 | 21194 | 4.2388 | 4 | 1.30878 | ... | 6 | 6 | 6 | 2 | 8.16515 | 0.312446 | 0 | 8 | False | False |
| ed | 0 | int64 | 18 | 5000 | 5000 | 0 | 72715 | 14.543 | 14 | 3.28108 | ... | 20 | 21 | 23 | 5 | 24.3862 | 4.69975 | 4.5 | 24.5 | False | False |
| edcat | 0 | int64 | 5 | 5000 | 5000 | 0 | 13360 | 2.672 | 2 | 1.21174 | ... | 5 | 5 | 5 | 2 | 6.30721 | -0.963215 | -1 | 7 | False | False |
| jobcat | 0 | int64 | 6 | 5000 | 5000 | 0 | 13764 | 2.7528 | 2 | 1.7379 | ... | 6 | 6 | 6 | 3 | 7.9665 | -2.4609 | -3.5 | 8.5 | False | False |
| union | 0 | int64 | 2 | 5000 | 5000 | 0 | 756 | 0.1512 | 0 | 0.35828 | ... | 1 | 1 | 1 | 0 | 1.22604 | -0.923639 | 0 | 0 | False | True |
| employ | 0 | int64 | 52 | 5000 | 5000 | 0 | 48652 | 9.7304 | 7 | 9.69093 | ... | 31 | 39 | 52 | 13 | 38.8032 | -19.3424 | -17.5 | 34.5 | True | True |
| empcat | 0 | int64 | 5 | 5000 | 5000 | 0 | 14663 | 2.9326 | 3 | 1.4533 | ... | 5 | 5 | 5 | 2 | 7.2925 | -1.4273 | -1 | 7 | False | False |
| retire | 0 | int64 | 2 | 5000 | 5000 | 0 | 738 | 0.1476 | 0 | 0.354739 | ... | 1 | 1 | 1 | 0 | 1.21182 | -0.916616 | 0 | 0 | False | True |
| income | 0 | int64 | 266 | 5000 | 5000 | 0 | 273798 | 54.7596 | 38 | 55.3775 | ... | 147 | 272.01 | 1073 | 43 | 220.892 | -111.373 | -40.5 | 131.5 | True | True |
| lninc | 0 | float64 | 266 | 5000 | 5000 | 0 | 18499.5 | 3.69991 | 3.63759 | 0.747072 | ... | 4.99043 | 5.60584 | 6.97821 | 1.02664 | 5.94113 | 1.45869 | 1.6381 | 5.74465 | True | True |
| inccat | 0 | int64 | 5 | 5000 | 5000 | 0 | 11961 | 2.3922 | 2 | 1.22126 | ... | 5 | 5 | 5 | 2 | 6.05598 | -1.27158 | -2 | 6 | False | False |
| debtinc | 0 | float64 | 325 | 5000 | 5000 | 0 | 49770.8 | 9.95416 | 8.8 | 6.39978 | ... | 22.2 | 29.2 | 43.1 | 8.5 | 29.1535 | -9.24519 | -7.65 | 26.35 | True | True |
| creddebt | 0 | float64 | 4950 | 5000 | 5000 | 0 | 9286.63 | 1.85733 | 0.926437 | 3.41573 | ... | 6.37301 | 14.2804 | 109.073 | 1.6783 | 12.1045 | -8.38987 | -2.13193 | 4.58127 | True | True |
| lncreddebt | 0.02 | float64 | 4941 | 5000 | 4999 | 1 | -652.137 | -0.130454 | -0.076106 | 1.27306 | ... | 1.8523 | 2.65891 | 4.69201 | 1.67735 | 3.68872 | -3.94963 | -3.46871 | 3.24069 | True | True |
| othdebt | 0 | float64 | 4973 | 5000 | 5000 | 0 | 18272.3 | 3.65446 | 2.09854 | 5.39517 | ... | 11.816 | 24.0643 | 141.459 | 3.33448 | 19.84 | -12.5311 | -4.02142 | 9.3165 | True | True |
| lnothdebt | 0.02 | float64 | 4972 | 5000 | 4999 | 1 | 3483.88 | 0.696915 | 0.741537 | 1.12858 | ... | 2.46959 | 3.1808 | 4.95201 | 1.48104 | 4.08265 | -2.68882 | -2.24055 | 3.68361 | True | True |
| default | 0 | int64 | 2 | 5000 | 5000 | 0 | 1171 | 0.2342 | 0 | 0.42354 | ... | 1 | 1 | 1 | 0 | 1.50482 | -1.03642 | 0 | 0 | False | True |
| jobsat | 0 | int64 | 5 | 5000 | 5000 | 0 | 14821 | 2.9642 | 3 | 1.37946 | ... | 5 | 5 | 5 | 2 | 7.10257 | -1.17417 | -1 | 7 | False | False |
| marital | 0 | int64 | 2 | 5000 | 5000 | 0 | 2401 | 0.4802 | 0 | 0.499658 | ... | 1 | 1 | 1 | 1 | 1.97917 | -1.01877 | -1.5 | 2.5 | False | False |
| spoused | 0 | int64 | 22 | 5000 | 5000 | 0 | 30564 | 6.1128 | -1 | 7.74352 | ... | 18 | 20 | 24 | 15 | 29.3434 | -17.1178 | -23.5 | 36.5 | False | False |
| spousedcat | 0 | int64 | 6 | 5000 | 5000 | 0 | 3207 | 0.6414 | -1 | 1.88677 | ... | 4 | 5 | 5 | 3 | 6.30172 | -5.01892 | -5.5 | 6.5 | False | False |
| reside | 0 | int64 | 9 | 5000 | 5000 | 0 | 11020 | 2.204 | 2 | 1.39398 | ... | 5 | 6 | 9 | 2 | 6.38593 | -1.97793 | -2 | 6 | True | True |
| pets | 0 | int64 | 20 | 5000 | 5000 | 0 | 15337 | 3.0674 | 2 | 3.4145 | ... | 10 | 13 | 21 | 5 | 13.3109 | -7.17609 | -7.5 | 12.5 | True | True |
| pets_cats | 0 | int64 | 7 | 5000 | 5000 | 0 | 2502 | 0.5004 | 0 | 0.860783 | ... | 2 | 3 | 6 | 1 | 3.08275 | -2.08195 | -1.5 | 2.5 | True | True |
| pets_dogs | 0 | int64 | 7 | 5000 | 5000 | 0 | 1962 | 0.3924 | 0 | 0.796084 | ... | 2 | 3 | 7 | 0 | 2.78065 | -1.99585 | 0 | 0 | True | True |
| pets_birds | 0 | int64 | 6 | 5000 | 5000 | 0 | 552 | 0.1104 | 0 | 0.494227 | ... | 1 | 3 | 5 | 0 | 1.59308 | -1.37228 | 0 | 0 | True | True |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| cardten | 0.04 | float64 | 697 | 5000 | 4998 | 2 | 3.60095e+06 | 720.478 | 425 | 922.226 | ... | 2455.75 | 4011.2 | 13705 | 1080 | 3487.15 | -2046.2 | -1620 | 2700 | True | True |
| lncardten | 28.44 | float64 | 696 | 5000 | 3578 | 1422 | 22993.3 | 6.42631 | 6.63988 | 1.17205 | ... | 7.92326 | 8.39215 | 9.52552 | 1.36098 | 9.94246 | 2.91016 | 3.81647 | 9.26037 | True | True |
| wireless | 0 | int64 | 2 | 5000 | 5000 | 0 | 1344 | 0.2688 | 0 | 0.44338 | ... | 1 | 1 | 1 | 1 | 1.59894 | -1.06134 | -1.5 | 2.5 | False | False |
| wiremon | 0 | float64 | 746 | 5000 | 5000 | 0 | 53505.9 | 10.7012 | 0 | 19.7998 | ... | 51.305 | 78.304 | 186.25 | 20.9625 | 70.1007 | -48.6983 | -31.4437 | 52.4062 | True | True |
| lnwiremon | 73.12 | float64 | 745 | 5000 | 1344 | 3656 | 4845.12 | 3.605 | 3.598 | 0.390102 | ... | 4.26728 | 4.57719 | 5.22709 | 0.534776 | 4.77531 | 2.4347 | 2.52825 | 4.66736 | True | True |
| wireten | 0 | float64 | 1328 | 5000 | 5000 | 0 | 2.10992e+06 | 421.985 | 0 | 1001 | ... | 2687.92 | 4530.19 | 12858.6 | 89.9625 | 3424.99 | -2581.03 | -134.944 | 224.906 | True | True |
| lnwireten | 73.12 | float64 | 1327 | 5000 | 1344 | 3656 | 9150.13 | 6.80813 | 7.14719 | 1.28397 | ... | 8.31082 | 8.69012 | 9.46177 | 1.59729 | 10.66 | 2.95623 | 3.76216 | 10.1513 | True | True |
| multline | 0 | int64 | 2 | 5000 | 5000 | 0 | 2442 | 0.4884 | 0 | 0.499915 | ... | 1 | 1 | 1 | 1 | 1.98815 | -1.01135 | -1.5 | 2.5 | False | False |
| voice | 0 | int64 | 2 | 5000 | 5000 | 0 | 1515 | 0.303 | 0 | 0.459601 | ... | 1 | 1 | 1 | 1 | 1.6818 | -1.0758 | -1.5 | 2.5 | False | False |
| pager | 0 | int64 | 2 | 5000 | 5000 | 0 | 1218 | 0.2436 | 0 | 0.429297 | ... | 1 | 1 | 1 | 0 | 1.53149 | -1.04429 | 0 | 0 | False | True |
| internet | 0 | int64 | 5 | 5000 | 5000 | 0 | 5998 | 1.1996 | 1 | 1.44934 | ... | 4 | 4 | 4 | 2 | 5.54761 | -3.14841 | -3 | 5 | False | False |
| callid | 0 | int64 | 2 | 5000 | 5000 | 0 | 2376 | 0.4752 | 0 | 0.499435 | ... | 1 | 1 | 1 | 1 | 1.9735 | -1.0231 | -1.5 | 2.5 | False | False |
| callwait | 0 | int64 | 2 | 5000 | 5000 | 0 | 2395 | 0.479 | 0 | 0.499609 | ... | 1 | 1 | 1 | 1 | 1.97783 | -1.01983 | -1.5 | 2.5 | False | False |
| forward | 0 | int64 | 2 | 5000 | 5000 | 0 | 2403 | 0.4806 | 0 | 0.499673 | ... | 1 | 1 | 1 | 1 | 1.97962 | -1.01842 | -1.5 | 2.5 | False | False |
| confer | 0 | int64 | 2 | 5000 | 5000 | 0 | 2390 | 0.478 | 0 | 0.499566 | ... | 1 | 1 | 1 | 1 | 1.9767 | -1.0207 | -1.5 | 2.5 | False | False |
| ebill | 0 | int64 | 2 | 5000 | 5000 | 0 | 1743 | 0.3486 | 0 | 0.476575 | ... | 1 | 1 | 1 | 1 | 1.77832 | -1.08112 | -1.5 | 2.5 | False | False |
| owntv | 0 | int64 | 2 | 5000 | 5000 | 0 | 4915 | 0.983 | 1 | 0.129284 | ... | 1 | 1 | 1 | 0 | 1.37085 | 0.595148 | 1 | 1 | True | True |
| hourstv | 0 | int64 | 32 | 5000 | 5000 | 0 | 98225 | 19.645 | 20 | 5.16561 | ... | 28 | 31 | 36 | 6 | 35.1418 | 4.14817 | 8 | 32 | True | True |
| ownvcr | 0 | int64 | 2 | 5000 | 5000 | 0 | 4578 | 0.9156 | 1 | 0.278015 | ... | 1 | 1 | 1 | 0 | 1.74964 | 0.0815563 | 1 | 1 | True | True |
| owndvd | 0 | int64 | 2 | 5000 | 5000 | 0 | 4568 | 0.9136 | 1 | 0.280982 | ... | 1 | 1 | 1 | 0 | 1.75655 | 0.0706543 | 1 | 1 | True | True |
| owncd | 0 | int64 | 2 | 5000 | 5000 | 0 | 4664 | 0.9328 | 1 | 0.250393 | ... | 1 | 1 | 1 | 0 | 1.68398 | 0.181621 | 1 | 1 | True | True |
| ownpda | 0 | int64 | 2 | 5000 | 5000 | 0 | 1005 | 0.201 | 0 | 0.400788 | ... | 1 | 1 | 1 | 0 | 1.40336 | -1.00136 | 0 | 0 | False | True |
| ownpc | 0 | int64 | 2 | 5000 | 5000 | 0 | 3164 | 0.6328 | 1 | 0.48209 | ... | 1 | 1 | 1 | 1 | 2.07907 | -0.81347 | -1.5 | 2.5 | False | False |
| ownipod | 0 | int64 | 2 | 5000 | 5000 | 0 | 2396 | 0.4792 | 0 | 0.499617 | ... | 1 | 1 | 1 | 1 | 1.97805 | -1.01965 | -1.5 | 2.5 | False | False |
| owngame | 0 | int64 | 2 | 5000 | 5000 | 0 | 2374 | 0.4748 | 0 | 0.499415 | ... | 1 | 1 | 1 | 1 | 1.97304 | -1.02344 | -1.5 | 2.5 | False | False |
| ownfax | 0 | int64 | 2 | 5000 | 5000 | 0 | 894 | 0.1788 | 0 | 0.383223 | ... | 1 | 1 | 1 | 0 | 1.32847 | -0.970869 | 0 | 0 | False | True |
| news | 0 | int64 | 2 | 5000 | 5000 | 0 | 2363 | 0.4726 | 0 | 0.499299 | ... | 1 | 1 | 1 | 1 | 1.9705 | -1.0253 | -1.5 | 2.5 | False | False |
| response_01 | 0 | int64 | 2 | 5000 | 5000 | 0 | 418 | 0.0836 | 0 | 0.276815 | ... | 1 | 1 | 1 | 0 | 0.914044 | -0.746844 | 0 | 0 | True | True |
| response_02 | 0 | int64 | 2 | 5000 | 5000 | 0 | 649 | 0.1298 | 0 | 0.336117 | ... | 1 | 1 | 1 | 0 | 1.13815 | -0.878551 | 0 | 0 | False | True |
| response_03 | 0 | int64 | 2 | 5000 | 5000 | 0 | 513 | 0.1026 | 0 | 0.303466 | ... | 1 | 1 | 1 | 0 | 1.013 | -0.807798 | 0 | 0 | False | True |
130 rows × 29 columns
dataset.isnull().sum()[dataset.isnull().sum()>0]
townsize 2 lncreddebt 1 lnothdebt 1 commutetime 2 longten 3 lnlongten 3 lntollmon 2622 lntollten 2622 lnequipmon 3296 lnequipten 3296 lncardmon 1419 cardten 2 lncardten 1422 lnwiremon 3656 lnwireten 3656 dtype: int64
dataset.commutetime.fillna(dataset.commutetime.mean(),inplace=True)
di={1: 1, 5: 5}
dataset.townsize.fillna(dataset.region.map(di),inplace=True)
dataset.cardten.fillna(dataset.cardten.mean(),inplace=True)
dataset.longten.fillna(dataset.longten.mean(),inplace=True)
dataset['lncreddebt'] = np.log(dataset.creddebt+1)
dataset['lnothdebt'] = np.log(dataset.othdebt+1)
dataset['lnlongten'] = np.log(dataset.longten+1)
dataset['lntollmon'] = np.log(dataset.tollmon+1)
dataset['lnequipmon'] = np.log(dataset.equipmon+1)
dataset['lntollten'] = np.log(dataset.tollten+1)
dataset['lnequipten'] = np.log(dataset.equipten+1)
dataset['lncardmon'] = np.log(dataset.cardmon+1)
dataset['lncardten'] = np.log(dataset.cardten+1)
dataset['lnwiremon'] = np.log(dataset.wiremon+1)
dataset['lnwireten'] = np.log(dataset.wireten+1)
dataset.isnull().sum()[dataset.isnull().sum()>0]
Series([], dtype: int64)
dataset.isnull().sum().sum()
0
dataset.apply(lambda x: var_summary(x)).T
| NMISS% | D-type | unique | Length | N | NMISS | SUM | MEAN | MEDIAN | STD | ... | P95 | P99 | MAX | IQR | +3std | -3std | lowerB | upperB | outlier 1 | outlier 2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| region | 0 | int64 | 5 | 5000 | 5000 | 0 | 15007 | 3.0014 | 3 | 1.42176 | ... | 5 | 5 | 5 | 2 | 7.26668 | -1.26388 | -1 | 7 | False | False |
| townsize | 0 | float64 | 5 | 5000 | 5000 | 0 | 13437 | 2.6874 | 3 | 1.42621 | ... | 5 | 5 | 5 | 3 | 6.96604 | -1.59124 | -3.5 | 8.5 | False | False |
| gender | 0 | int64 | 2 | 5000 | 5000 | 0 | 2518 | 0.5036 | 1 | 0.500037 | ... | 1 | 1 | 1 | 1 | 2.00371 | -0.996511 | -1.5 | 2.5 | False | False |
| age | 0 | int64 | 62 | 5000 | 5000 | 0 | 235128 | 47.0256 | 47 | 17.7703 | ... | 76 | 79 | 79 | 31 | 100.337 | -6.28541 | -15.5 | 108.5 | False | False |
| agecat | 0 | int64 | 5 | 5000 | 5000 | 0 | 21194 | 4.2388 | 4 | 1.30878 | ... | 6 | 6 | 6 | 2 | 8.16515 | 0.312446 | 0 | 8 | False | False |
| ed | 0 | int64 | 18 | 5000 | 5000 | 0 | 72715 | 14.543 | 14 | 3.28108 | ... | 20 | 21 | 23 | 5 | 24.3862 | 4.69975 | 4.5 | 24.5 | False | False |
| edcat | 0 | int64 | 5 | 5000 | 5000 | 0 | 13360 | 2.672 | 2 | 1.21174 | ... | 5 | 5 | 5 | 2 | 6.30721 | -0.963215 | -1 | 7 | False | False |
| jobcat | 0 | int64 | 6 | 5000 | 5000 | 0 | 13764 | 2.7528 | 2 | 1.7379 | ... | 6 | 6 | 6 | 3 | 7.9665 | -2.4609 | -3.5 | 8.5 | False | False |
| union | 0 | int64 | 2 | 5000 | 5000 | 0 | 756 | 0.1512 | 0 | 0.35828 | ... | 1 | 1 | 1 | 0 | 1.22604 | -0.923639 | 0 | 0 | False | True |
| employ | 0 | int64 | 52 | 5000 | 5000 | 0 | 48652 | 9.7304 | 7 | 9.69093 | ... | 31 | 39 | 52 | 13 | 38.8032 | -19.3424 | -17.5 | 34.5 | True | True |
| empcat | 0 | int64 | 5 | 5000 | 5000 | 0 | 14663 | 2.9326 | 3 | 1.4533 | ... | 5 | 5 | 5 | 2 | 7.2925 | -1.4273 | -1 | 7 | False | False |
| retire | 0 | int64 | 2 | 5000 | 5000 | 0 | 738 | 0.1476 | 0 | 0.354739 | ... | 1 | 1 | 1 | 0 | 1.21182 | -0.916616 | 0 | 0 | False | True |
| income | 0 | int64 | 266 | 5000 | 5000 | 0 | 273798 | 54.7596 | 38 | 55.3775 | ... | 147 | 272.01 | 1073 | 43 | 220.892 | -111.373 | -40.5 | 131.5 | True | True |
| lninc | 0 | float64 | 266 | 5000 | 5000 | 0 | 18499.5 | 3.69991 | 3.63759 | 0.747072 | ... | 4.99043 | 5.60584 | 6.97821 | 1.02664 | 5.94113 | 1.45869 | 1.6381 | 5.74465 | True | True |
| inccat | 0 | int64 | 5 | 5000 | 5000 | 0 | 11961 | 2.3922 | 2 | 1.22126 | ... | 5 | 5 | 5 | 2 | 6.05598 | -1.27158 | -2 | 6 | False | False |
| debtinc | 0 | float64 | 325 | 5000 | 5000 | 0 | 49770.8 | 9.95416 | 8.8 | 6.39978 | ... | 22.2 | 29.2 | 43.1 | 8.5 | 29.1535 | -9.24519 | -7.65 | 26.35 | True | True |
| creddebt | 0 | float64 | 4950 | 5000 | 5000 | 0 | 9286.63 | 1.85733 | 0.926437 | 3.41573 | ... | 6.37301 | 14.2804 | 109.073 | 1.6783 | 12.1045 | -8.38987 | -2.13193 | 4.58127 | True | True |
| lncreddebt | 0 | float64 | 4919 | 5000 | 5000 | 0 | 4004.84 | 0.800968 | 0.655672 | 0.611692 | ... | 1.99783 | 2.72657 | 4.70114 | 0.793587 | 2.63604 | -1.03411 | -0.864306 | 2.31004 | True | True |
| othdebt | 0 | float64 | 4973 | 5000 | 5000 | 0 | 18272.3 | 3.65446 | 2.09854 | 5.39517 | ... | 11.816 | 24.0643 | 141.459 | 3.33448 | 19.84 | -12.5311 | -4.02142 | 9.3165 | True | True |
| lnothdebt | 0 | float64 | 4964 | 5000 | 5000 | 0 | 6153.78 | 1.23076 | 1.13093 | 0.714314 | ... | 2.55069 | 3.22144 | 4.95906 | 0.987243 | 3.3737 | -0.912186 | -0.797615 | 3.15136 | True | True |
| default | 0 | int64 | 2 | 5000 | 5000 | 0 | 1171 | 0.2342 | 0 | 0.42354 | ... | 1 | 1 | 1 | 0 | 1.50482 | -1.03642 | 0 | 0 | False | True |
| jobsat | 0 | int64 | 5 | 5000 | 5000 | 0 | 14821 | 2.9642 | 3 | 1.37946 | ... | 5 | 5 | 5 | 2 | 7.10257 | -1.17417 | -1 | 7 | False | False |
| marital | 0 | int64 | 2 | 5000 | 5000 | 0 | 2401 | 0.4802 | 0 | 0.499658 | ... | 1 | 1 | 1 | 1 | 1.97917 | -1.01877 | -1.5 | 2.5 | False | False |
| spoused | 0 | int64 | 22 | 5000 | 5000 | 0 | 30564 | 6.1128 | -1 | 7.74352 | ... | 18 | 20 | 24 | 15 | 29.3434 | -17.1178 | -23.5 | 36.5 | False | False |
| spousedcat | 0 | int64 | 6 | 5000 | 5000 | 0 | 3207 | 0.6414 | -1 | 1.88677 | ... | 4 | 5 | 5 | 3 | 6.30172 | -5.01892 | -5.5 | 6.5 | False | False |
| reside | 0 | int64 | 9 | 5000 | 5000 | 0 | 11020 | 2.204 | 2 | 1.39398 | ... | 5 | 6 | 9 | 2 | 6.38593 | -1.97793 | -2 | 6 | True | True |
| pets | 0 | int64 | 20 | 5000 | 5000 | 0 | 15337 | 3.0674 | 2 | 3.4145 | ... | 10 | 13 | 21 | 5 | 13.3109 | -7.17609 | -7.5 | 12.5 | True | True |
| pets_cats | 0 | int64 | 7 | 5000 | 5000 | 0 | 2502 | 0.5004 | 0 | 0.860783 | ... | 2 | 3 | 6 | 1 | 3.08275 | -2.08195 | -1.5 | 2.5 | True | True |
| pets_dogs | 0 | int64 | 7 | 5000 | 5000 | 0 | 1962 | 0.3924 | 0 | 0.796084 | ... | 2 | 3 | 7 | 0 | 2.78065 | -1.99585 | 0 | 0 | True | True |
| pets_birds | 0 | int64 | 6 | 5000 | 5000 | 0 | 552 | 0.1104 | 0 | 0.494227 | ... | 1 | 3 | 5 | 0 | 1.59308 | -1.37228 | 0 | 0 | True | True |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| cardten | 0 | float64 | 698 | 5000 | 5000 | 0 | 3.60239e+06 | 720.478 | 425 | 922.041 | ... | 2455.25 | 4010.4 | 13705 | 1080 | 3486.6 | -2045.64 | -1620 | 2700 | True | True |
| lncardten | 0 | float64 | 698 | 5000 | 5000 | 0 | 23023.6 | 4.60473 | 6.05444 | 3.06228 | ... | 7.80639 | 8.2969 | 9.52559 | 6.98564 | 13.7916 | -4.58211 | -10.4785 | 17.4641 | False | False |
| wireless | 0 | int64 | 2 | 5000 | 5000 | 0 | 1344 | 0.2688 | 0 | 0.44338 | ... | 1 | 1 | 1 | 1 | 1.59894 | -1.06134 | -1.5 | 2.5 | False | False |
| wiremon | 0 | float64 | 746 | 5000 | 5000 | 0 | 53505.9 | 10.7012 | 0 | 19.7998 | ... | 51.305 | 78.304 | 186.25 | 20.9625 | 70.1007 | -48.6983 | -31.4437 | 52.4062 | True | True |
| lnwiremon | 0 | float64 | 746 | 5000 | 5000 | 0 | 4883.8 | 0.976759 | 0 | 1.62312 | ... | 3.95709 | 4.37329 | 5.23244 | 3.08934 | 5.84611 | -3.89259 | -4.634 | 7.72334 | False | False |
| wireten | 0 | float64 | 1328 | 5000 | 5000 | 0 | 2.10992e+06 | 421.985 | 0 | 1001 | ... | 2687.92 | 4530.19 | 12858.6 | 89.9625 | 3424.99 | -2581.03 | -134.944 | 224.906 | True | True |
| lnwireten | 0 | float64 | 1328 | 5000 | 5000 | 0 | 9154.7 | 1.83094 | 0 | 3.09186 | ... | 7.8969 | 8.41874 | 9.46185 | 4.51042 | 11.1065 | -7.44462 | -6.76564 | 11.2761 | False | False |
| multline | 0 | int64 | 2 | 5000 | 5000 | 0 | 2442 | 0.4884 | 0 | 0.499915 | ... | 1 | 1 | 1 | 1 | 1.98815 | -1.01135 | -1.5 | 2.5 | False | False |
| voice | 0 | int64 | 2 | 5000 | 5000 | 0 | 1515 | 0.303 | 0 | 0.459601 | ... | 1 | 1 | 1 | 1 | 1.6818 | -1.0758 | -1.5 | 2.5 | False | False |
| pager | 0 | int64 | 2 | 5000 | 5000 | 0 | 1218 | 0.2436 | 0 | 0.429297 | ... | 1 | 1 | 1 | 0 | 1.53149 | -1.04429 | 0 | 0 | False | True |
| internet | 0 | int64 | 5 | 5000 | 5000 | 0 | 5998 | 1.1996 | 1 | 1.44934 | ... | 4 | 4 | 4 | 2 | 5.54761 | -3.14841 | -3 | 5 | False | False |
| callid | 0 | int64 | 2 | 5000 | 5000 | 0 | 2376 | 0.4752 | 0 | 0.499435 | ... | 1 | 1 | 1 | 1 | 1.9735 | -1.0231 | -1.5 | 2.5 | False | False |
| callwait | 0 | int64 | 2 | 5000 | 5000 | 0 | 2395 | 0.479 | 0 | 0.499609 | ... | 1 | 1 | 1 | 1 | 1.97783 | -1.01983 | -1.5 | 2.5 | False | False |
| forward | 0 | int64 | 2 | 5000 | 5000 | 0 | 2403 | 0.4806 | 0 | 0.499673 | ... | 1 | 1 | 1 | 1 | 1.97962 | -1.01842 | -1.5 | 2.5 | False | False |
| confer | 0 | int64 | 2 | 5000 | 5000 | 0 | 2390 | 0.478 | 0 | 0.499566 | ... | 1 | 1 | 1 | 1 | 1.9767 | -1.0207 | -1.5 | 2.5 | False | False |
| ebill | 0 | int64 | 2 | 5000 | 5000 | 0 | 1743 | 0.3486 | 0 | 0.476575 | ... | 1 | 1 | 1 | 1 | 1.77832 | -1.08112 | -1.5 | 2.5 | False | False |
| owntv | 0 | int64 | 2 | 5000 | 5000 | 0 | 4915 | 0.983 | 1 | 0.129284 | ... | 1 | 1 | 1 | 0 | 1.37085 | 0.595148 | 1 | 1 | True | True |
| hourstv | 0 | int64 | 32 | 5000 | 5000 | 0 | 98225 | 19.645 | 20 | 5.16561 | ... | 28 | 31 | 36 | 6 | 35.1418 | 4.14817 | 8 | 32 | True | True |
| ownvcr | 0 | int64 | 2 | 5000 | 5000 | 0 | 4578 | 0.9156 | 1 | 0.278015 | ... | 1 | 1 | 1 | 0 | 1.74964 | 0.0815563 | 1 | 1 | True | True |
| owndvd | 0 | int64 | 2 | 5000 | 5000 | 0 | 4568 | 0.9136 | 1 | 0.280982 | ... | 1 | 1 | 1 | 0 | 1.75655 | 0.0706543 | 1 | 1 | True | True |
| owncd | 0 | int64 | 2 | 5000 | 5000 | 0 | 4664 | 0.9328 | 1 | 0.250393 | ... | 1 | 1 | 1 | 0 | 1.68398 | 0.181621 | 1 | 1 | True | True |
| ownpda | 0 | int64 | 2 | 5000 | 5000 | 0 | 1005 | 0.201 | 0 | 0.400788 | ... | 1 | 1 | 1 | 0 | 1.40336 | -1.00136 | 0 | 0 | False | True |
| ownpc | 0 | int64 | 2 | 5000 | 5000 | 0 | 3164 | 0.6328 | 1 | 0.48209 | ... | 1 | 1 | 1 | 1 | 2.07907 | -0.81347 | -1.5 | 2.5 | False | False |
| ownipod | 0 | int64 | 2 | 5000 | 5000 | 0 | 2396 | 0.4792 | 0 | 0.499617 | ... | 1 | 1 | 1 | 1 | 1.97805 | -1.01965 | -1.5 | 2.5 | False | False |
| owngame | 0 | int64 | 2 | 5000 | 5000 | 0 | 2374 | 0.4748 | 0 | 0.499415 | ... | 1 | 1 | 1 | 1 | 1.97304 | -1.02344 | -1.5 | 2.5 | False | False |
| ownfax | 0 | int64 | 2 | 5000 | 5000 | 0 | 894 | 0.1788 | 0 | 0.383223 | ... | 1 | 1 | 1 | 0 | 1.32847 | -0.970869 | 0 | 0 | False | True |
| news | 0 | int64 | 2 | 5000 | 5000 | 0 | 2363 | 0.4726 | 0 | 0.499299 | ... | 1 | 1 | 1 | 1 | 1.9705 | -1.0253 | -1.5 | 2.5 | False | False |
| response_01 | 0 | int64 | 2 | 5000 | 5000 | 0 | 418 | 0.0836 | 0 | 0.276815 | ... | 1 | 1 | 1 | 0 | 0.914044 | -0.746844 | 0 | 0 | True | True |
| response_02 | 0 | int64 | 2 | 5000 | 5000 | 0 | 649 | 0.1298 | 0 | 0.336117 | ... | 1 | 1 | 1 | 0 | 1.13815 | -0.878551 | 0 | 0 | False | True |
| response_03 | 0 | int64 | 2 | 5000 | 5000 | 0 | 513 | 0.1026 | 0 | 0.303466 | ... | 1 | 1 | 1 | 0 | 1.013 | -0.807798 | 0 | 0 | False | True |
130 rows × 29 columns
dataset=dataset.apply(lambda x:x.clip(x.quantile(0.01),x.quantile(0.99)))
dataset['total_spent']=dataset['cardspent']+dataset['card2spent']
dataset.drop(['cardspent','card2spent'],inplace=True,axis=1)
dataset.total_spent.hist()
<matplotlib.axes._subplots.AxesSubplot at 0x1a58caa2940>
the 'total spent' is not in normal form transform it to normal form by taking its log
dataset['total_spent']=np.log(dataset['total_spent'])
dataset['total_spent'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x1a58e7b2f28>
binary = dataset.apply(lambda x: len(x.unique()) )[dataset.apply(lambda x: len(x.unique()) ) == 2].index
binary
Index(['gender', 'union', 'retire', 'default', 'marital', 'homeown', 'carbuy',
'commutecar', 'commutemotorcycle', 'commutecarpool', 'commutebus',
'commuterail', 'commutepublic', 'commutebike', 'commutewalk',
'commutenonmotor', 'telecommute', 'polparty', 'polcontrib', 'vote',
'cardfee', 'card2fee', 'active', 'churn', 'tollfree', 'equip',
'callcard', 'wireless', 'multline', 'voice', 'pager', 'callid',
'callwait', 'forward', 'confer', 'ebill', 'owntv', 'ownvcr', 'owndvd',
'owncd', 'ownpda', 'ownpc', 'ownipod', 'owngame', 'ownfax', 'news',
'response_01', 'response_02', 'response_03'],
dtype='object')
df=dataset.drop(binary,axis=1)
warnings.filterwarnings('ignore')
for i in range(len(df.columns)):
sns.set(rc={'figure.figsize':(11.7,5)})
plt.subplot(len(df.columns), 1, 1)
p=sns.jointplot(df.iloc[:,i],'total_spent',df)
dataset.corr().iloc[:,-1:]
| total_spent | |
|---|---|
| region | 0.039421 |
| townsize | 0.007129 |
| gender | -0.078772 |
| age | 0.005796 |
| agecat | 0.029055 |
| ed | 0.098166 |
| edcat | 0.094562 |
| jobcat | -0.011747 |
| union | 0.021709 |
| employ | 0.067825 |
| empcat | 0.095660 |
| retire | -0.199619 |
| income | 0.355692 |
| lninc | 0.405218 |
| inccat | 0.381289 |
| debtinc | 0.015695 |
| creddebt | 0.232934 |
| lncreddebt | 0.259732 |
| othdebt | 0.260580 |
| lnothdebt | 0.284354 |
| default | 0.018402 |
| jobsat | 0.065629 |
| marital | 0.017848 |
| spoused | 0.030032 |
| spousedcat | 0.037413 |
| reside | 0.003676 |
| pets | -0.005194 |
| pets_cats | 0.001742 |
| pets_dogs | 0.007085 |
| pets_birds | -0.018450 |
| ... | ... |
| lncardten | 0.029419 |
| wireless | 0.073661 |
| wiremon | 0.086594 |
| lnwiremon | 0.078608 |
| wireten | 0.096763 |
| lnwireten | 0.084878 |
| multline | 0.044064 |
| voice | 0.044857 |
| pager | 0.057203 |
| internet | 0.074008 |
| callid | 0.054924 |
| callwait | 0.056093 |
| forward | 0.051178 |
| confer | 0.054691 |
| ebill | 0.032282 |
| owntv | 0.084374 |
| hourstv | 0.044345 |
| ownvcr | 0.157676 |
| owndvd | 0.164473 |
| owncd | 0.148373 |
| ownpda | 0.069264 |
| ownpc | 0.044011 |
| ownipod | 0.041507 |
| owngame | 0.042280 |
| ownfax | 0.062556 |
| news | 0.028815 |
| response_01 | -0.008875 |
| response_02 | 0.027170 |
| response_03 | 0.061761 |
| total_spent | 1.000000 |
129 rows × 1 columns
num_var.keys()
dict_keys(['custid', 'region', 'townsize', 'gender', 'age', 'agecat', 'birthmonth', 'ed', 'edcat', 'jobcat', 'union', 'employ', 'empcat', 'retire', 'income', 'lninc', 'inccat', 'debtinc', 'creddebt', 'lncreddebt', 'othdebt', 'lnothdebt', 'default', 'jobsat', 'marital', 'spoused', 'spousedcat', 'reside', 'pets', 'pets_cats', 'pets_dogs', 'pets_birds', 'pets_reptiles', 'pets_small', 'pets_saltfish', 'pets_freshfish', 'homeown', 'hometype', 'address', 'addresscat', 'cars', 'carown', 'cartype', 'carvalue', 'carcatvalue', 'carbought', 'carbuy', 'commute', 'commutecat', 'commutetime', 'commutecar', 'commutemotorcycle', 'commutecarpool', 'commutebus', 'commuterail', 'commutepublic', 'commutebike', 'commutewalk', 'commutenonmotor', 'telecommute', 'reason', 'polview', 'polparty', 'polcontrib', 'vote', 'card', 'cardtype', 'cardbenefit', 'cardfee', 'cardtenure', 'cardtenurecat', 'card2', 'card2type', 'card2benefit', 'card2fee', 'card2tenure', 'card2tenurecat', 'carditems', 'cardspent', 'card2items', 'card2spent', 'active', 'bfast', 'tenure', 'churn', 'longmon', 'lnlongmon', 'longten', 'lnlongten', 'tollfree', 'tollmon', 'lntollmon', 'tollten', 'lntollten', 'equip', 'equipmon', 'lnequipmon', 'equipten', 'lnequipten', 'callcard', 'cardmon', 'lncardmon', 'cardten', 'lncardten', 'wireless', 'wiremon', 'lnwiremon', 'wireten', 'lnwireten', 'multline', 'voice', 'pager', 'internet', 'callid', 'callwait', 'forward', 'confer', 'ebill', 'owntv', 'hourstv', 'ownvcr', 'owndvd', 'owncd', 'ownpda', 'ownpc', 'ownipod', 'owngame', 'ownfax', 'news', 'response_01', 'response_02', 'response_03', nan])
As we can see that here we have 2 cards and same columns for each card. So we cam combine those columns
dataset['total_benefit']=dataset['cardbenefit']+dataset['card2benefit']
dataset['total_fee']=dataset['cardfee']+dataset['card2fee']
dataset['total_tenure']=dataset['cardtenure']+dataset['card2tenure']
dataset['total_items']=dataset['carditems']+dataset['card2items']
dataset.drop(['cardbenefit','card2benefit','carditems','card2items','cardtenure','card2tenure','cardfee','card2fee'],axis=1,inplace=True)
1>Normality:
relation between X and Y should be linear
check using
a)QQ Plot
b)Histogram of Residuals
2>No Multicollinearity:
Variables having high Correlation
correction:
Drop variables with high correlation
3>No Autocorrelation
Residuals correlated with one another
check using:
a)scatterplot
correction:
Adjust the coeff std error using transformation
4>Heteroscadasticity
Variance increase with increase in X
check using
a)Scatterplot
correction:
Calc robust Std error to recalc T-stats
import statsmodels.formula.api as smf
all_columns = "+".join(dataset.columns.difference( ['total_spent'] ))
formula='total_spent~'+all_columns
lm=smf.ols(formula=formula,data=dataset).fit()
lm.summary()
| Dep. Variable: | total_spent | R-squared: | 0.645 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.635 |
| Method: | Least Squares | F-statistic: | 71.27 |
| Date: | Tue, 12 Mar 2019 | Prob (F-statistic): | 0.00 |
| Time: | 19:50:44 | Log-Likelihood: | -2314.1 |
| No. Observations: | 5000 | AIC: | 4878. |
| Df Residuals: | 4875 | BIC: | 5693. |
| Df Model: | 124 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 4.2792 | 0.142 | 30.195 | 0.000 | 4.001 | 4.557 |
| active | 0.0050 | 0.012 | 0.423 | 0.672 | -0.018 | 0.028 |
| address | 7.384e-05 | 0.001 | 0.052 | 0.958 | -0.003 | 0.003 |
| addresscat | -0.0115 | 0.013 | -0.854 | 0.393 | -0.038 | 0.015 |
| age | -0.0034 | 0.002 | -2.213 | 0.027 | -0.006 | -0.000 |
| agecat | 0.0408 | 0.019 | 2.168 | 0.030 | 0.004 | 0.078 |
| bfast | 0.0030 | 0.007 | 0.436 | 0.663 | -0.011 | 0.017 |
| callcard | 0.1632 | 0.092 | 1.769 | 0.077 | -0.018 | 0.344 |
| callid | 0.0175 | 0.017 | 1.052 | 0.293 | -0.015 | 0.050 |
| callwait | -0.0068 | 0.016 | -0.412 | 0.680 | -0.039 | 0.025 |
| carbought | -0.0014 | 0.012 | -0.110 | 0.912 | -0.026 | 0.023 |
| carbuy | 0.0136 | 0.012 | 1.128 | 0.259 | -0.010 | 0.037 |
| carcatvalue | -0.0035 | 0.015 | -0.230 | 0.818 | -0.033 | 0.026 |
| card | -0.1282 | 0.005 | -25.008 | 0.000 | -0.138 | -0.118 |
| card2 | -0.0680 | 0.005 | -13.137 | 0.000 | -0.078 | -0.058 |
| card2tenurecat | 0.0016 | 0.014 | 0.117 | 0.907 | -0.026 | 0.029 |
| card2type | 0.0059 | 0.005 | 1.193 | 0.233 | -0.004 | 0.016 |
| cardmon | 0.0017 | 0.002 | 0.742 | 0.458 | -0.003 | 0.006 |
| cardten | 4.054e-05 | 2.59e-05 | 1.566 | 0.117 | -1.02e-05 | 9.13e-05 |
| cardtenurecat | -0.0136 | 0.013 | -1.043 | 0.297 | -0.039 | 0.012 |
| cardtype | 0.0053 | 0.005 | 1.067 | 0.286 | -0.004 | 0.015 |
| carown | 0.0242 | 0.015 | 1.653 | 0.098 | -0.005 | 0.053 |
| cars | 0.0052 | 0.006 | 0.812 | 0.417 | -0.007 | 0.018 |
| cartype | -0.0138 | 0.011 | -1.225 | 0.221 | -0.036 | 0.008 |
| carvalue | -0.0009 | 0.001 | -1.003 | 0.316 | -0.003 | 0.001 |
| churn | 0.0252 | 0.015 | 1.683 | 0.092 | -0.004 | 0.055 |
| commute | 0.0102 | 0.012 | 0.843 | 0.399 | -0.014 | 0.034 |
| commutebike | -0.0019 | 0.017 | -0.107 | 0.914 | -0.036 | 0.032 |
| commutebus | -0.0055 | 0.013 | -0.435 | 0.664 | -0.030 | 0.019 |
| commutecar | 0.0062 | 0.019 | 0.326 | 0.744 | -0.031 | 0.043 |
| commutecarpool | 0.0062 | 0.013 | 0.492 | 0.623 | -0.019 | 0.031 |
| commutecat | -0.0172 | 0.028 | -0.617 | 0.537 | -0.072 | 0.037 |
| commutemotorcycle | -0.0069 | 0.018 | -0.376 | 0.707 | -0.043 | 0.029 |
| commutenonmotor | -0.0292 | 0.025 | -1.170 | 0.242 | -0.078 | 0.020 |
| commutepublic | -0.0025 | 0.019 | -0.132 | 0.895 | -0.040 | 0.035 |
| commuterail | -0.0197 | 0.013 | -1.562 | 0.118 | -0.045 | 0.005 |
| commutetime | -0.0001 | 0.001 | -0.094 | 0.925 | -0.002 | 0.002 |
| commutewalk | -0.0301 | 0.013 | -2.408 | 0.016 | -0.055 | -0.006 |
| confer | -0.0003 | 0.017 | -0.016 | 0.988 | -0.033 | 0.033 |
| creddebt | 0.0005 | 0.007 | 0.065 | 0.948 | -0.013 | 0.014 |
| debtinc | 0.0010 | 0.002 | 0.409 | 0.683 | -0.004 | 0.006 |
| default | 0.0062 | 0.016 | 0.385 | 0.701 | -0.025 | 0.038 |
| ebill | 0.0115 | 0.016 | 0.721 | 0.471 | -0.020 | 0.043 |
| ed | -0.0040 | 0.007 | -0.574 | 0.566 | -0.018 | 0.010 |
| edcat | -0.0024 | 0.018 | -0.132 | 0.895 | -0.038 | 0.033 |
| empcat | -0.0012 | 0.011 | -0.108 | 0.914 | -0.022 | 0.020 |
| employ | 0.0005 | 0.002 | 0.290 | 0.772 | -0.003 | 0.004 |
| equip | 0.2308 | 0.695 | 0.332 | 0.740 | -1.131 | 1.593 |
| equipmon | 0.0027 | 0.008 | 0.353 | 0.724 | -0.012 | 0.018 |
| equipten | 3.802e-05 | 2.98e-05 | 1.276 | 0.202 | -2.04e-05 | 9.64e-05 |
| forward | -0.0021 | 0.016 | -0.127 | 0.899 | -0.034 | 0.030 |
| gender | -0.0553 | 0.011 | -4.940 | 0.000 | -0.077 | -0.033 |
| homeown | 0.0034 | 0.012 | 0.281 | 0.779 | -0.020 | 0.027 |
| hometype | 0.0071 | 0.006 | 1.168 | 0.243 | -0.005 | 0.019 |
| hourstv | -0.0003 | 0.001 | -0.201 | 0.841 | -0.003 | 0.002 |
| inccat | 0.0182 | 0.017 | 1.081 | 0.280 | -0.015 | 0.051 |
| income | 0.0007 | 0.000 | 1.566 | 0.117 | -0.000 | 0.002 |
| internet | 0.0069 | 0.006 | 1.155 | 0.248 | -0.005 | 0.019 |
| jobcat | -0.0069 | 0.004 | -1.756 | 0.079 | -0.015 | 0.001 |
| jobsat | -0.0048 | 0.005 | -1.004 | 0.315 | -0.014 | 0.005 |
| lncardmon | -0.0677 | 0.046 | -1.464 | 0.143 | -0.158 | 0.023 |
| lncardten | -0.0097 | 0.016 | -0.608 | 0.543 | -0.041 | 0.021 |
| lncreddebt | 0.0064 | 0.031 | 0.202 | 0.840 | -0.055 | 0.068 |
| lnequipmon | -0.0672 | 0.275 | -0.244 | 0.807 | -0.607 | 0.473 |
| lnequipten | -0.0191 | 0.022 | -0.856 | 0.392 | -0.063 | 0.025 |
| lninc | 0.2709 | 0.039 | 6.981 | 0.000 | 0.195 | 0.347 |
| lnlongmon | -0.0319 | 0.045 | -0.706 | 0.480 | -0.121 | 0.057 |
| lnlongten | 8.113e-05 | 0.022 | 0.004 | 0.997 | -0.043 | 0.043 |
| lnothdebt | -0.0384 | 0.031 | -1.247 | 0.213 | -0.099 | 0.022 |
| lntollmon | 0.0652 | 0.126 | 0.516 | 0.606 | -0.183 | 0.313 |
| lntollten | -0.0063 | 0.022 | -0.281 | 0.779 | -0.050 | 0.038 |
| lnwiremon | 0.0696 | 0.170 | 0.408 | 0.683 | -0.265 | 0.404 |
| lnwireten | 0.0199 | 0.024 | 0.816 | 0.414 | -0.028 | 0.068 |
| longmon | 0.0072 | 0.006 | 1.181 | 0.238 | -0.005 | 0.019 |
| longten | -9.203e-05 | 7.18e-05 | -1.282 | 0.200 | -0.000 | 4.87e-05 |
| marital | 0.1004 | 0.058 | 1.728 | 0.084 | -0.013 | 0.214 |
| multline | -0.0226 | 0.015 | -1.558 | 0.119 | -0.051 | 0.006 |
| news | 0.0009 | 0.014 | 0.060 | 0.952 | -0.027 | 0.028 |
| othdebt | 0.0046 | 0.004 | 1.239 | 0.215 | -0.003 | 0.012 |
| owncd | 0.0150 | 0.028 | 0.535 | 0.593 | -0.040 | 0.070 |
| owndvd | 0.0073 | 0.025 | 0.289 | 0.773 | -0.042 | 0.057 |
| ownfax | 0.0028 | 0.019 | 0.147 | 0.883 | -0.035 | 0.040 |
| owngame | -0.0169 | 0.014 | -1.236 | 0.217 | -0.044 | 0.010 |
| ownipod | -0.0104 | 0.013 | -0.771 | 0.441 | -0.037 | 0.016 |
| ownpc | 0.0206 | 0.016 | 1.301 | 0.193 | -0.010 | 0.052 |
| ownpda | 0.0191 | 0.018 | 1.043 | 0.297 | -0.017 | 0.055 |
| owntv | -0.0618 | 0.056 | -1.110 | 0.267 | -0.171 | 0.047 |
| ownvcr | 0.0082 | 0.026 | 0.318 | 0.750 | -0.042 | 0.058 |
| pager | -0.0058 | 0.020 | -0.294 | 0.769 | -0.044 | 0.033 |
| pets | 0.0063 | 0.017 | 0.365 | 0.715 | -0.027 | 0.040 |
| pets_birds | -0.0247 | 0.021 | -1.154 | 0.249 | -0.067 | 0.017 |
| pets_cats | 0.0021 | 0.019 | 0.113 | 0.910 | -0.035 | 0.039 |
| pets_dogs | -0.0057 | 0.019 | -0.300 | 0.764 | -0.043 | 0.032 |
| pets_freshfish | -0.0056 | 0.017 | -0.327 | 0.744 | -0.039 | 0.028 |
| pets_reptiles | 0.0317 | 0.028 | 1.144 | 0.252 | -0.023 | 0.086 |
| pets_saltfish | -0.0228 | 0.041 | -0.551 | 0.582 | -0.104 | 0.058 |
| pets_small | -0.0009 | 0.022 | -0.041 | 0.967 | -0.045 | 0.043 |
| polcontrib | 0.0090 | 0.013 | 0.682 | 0.495 | -0.017 | 0.035 |
| polparty | 0.0013 | 0.012 | 0.116 | 0.908 | -0.021 | 0.024 |
| polview | 0.0035 | 0.004 | 0.859 | 0.390 | -0.004 | 0.012 |
| reason | -0.0012 | 0.002 | -0.621 | 0.535 | -0.005 | 0.003 |
| region | 0.0068 | 0.004 | 1.573 | 0.116 | -0.002 | 0.015 |
| reside | 0.0004 | 0.006 | 0.074 | 0.941 | -0.011 | 0.012 |
| response_01 | -0.0177 | 0.020 | -0.870 | 0.384 | -0.058 | 0.022 |
| response_02 | -0.0006 | 0.017 | -0.038 | 0.970 | -0.033 | 0.032 |
| response_03 | 0.0410 | 0.019 | 2.204 | 0.028 | 0.005 | 0.077 |
| retire | 0.0378 | 0.029 | 1.312 | 0.190 | -0.019 | 0.094 |
| spoused | -0.0161 | 0.008 | -1.966 | 0.049 | -0.032 | -4.49e-05 |
| spousedcat | 0.0424 | 0.022 | 1.897 | 0.058 | -0.001 | 0.086 |
| telecommute | 0.0055 | 0.015 | 0.382 | 0.703 | -0.023 | 0.034 |
| tenure | 0.0009 | 0.001 | 0.611 | 0.541 | -0.002 | 0.004 |
| tollfree | -0.0977 | 0.242 | -0.404 | 0.686 | -0.572 | 0.377 |
| tollmon | -0.0022 | 0.005 | -0.455 | 0.649 | -0.012 | 0.007 |
| tollten | -1.189e-05 | 3.27e-05 | -0.364 | 0.716 | -7.59e-05 | 5.21e-05 |
| total_benefit | -0.0031 | 0.004 | -0.882 | 0.378 | -0.010 | 0.004 |
| total_fee | -0.0047 | 0.010 | -0.470 | 0.638 | -0.024 | 0.015 |
| total_items | 0.0931 | 0.001 | 71.585 | 0.000 | 0.091 | 0.096 |
| total_tenure | 0.0007 | 0.001 | 0.531 | 0.595 | -0.002 | 0.003 |
| townsize | -0.0013 | 0.005 | -0.254 | 0.799 | -0.012 | 0.009 |
| union | 0.0128 | 0.016 | 0.819 | 0.413 | -0.018 | 0.043 |
| voice | -0.0353 | 0.018 | -1.979 | 0.048 | -0.070 | -0.000 |
| vote | 0.0007 | 0.011 | 0.060 | 0.952 | -0.022 | 0.023 |
| wireless | -0.2256 | 0.405 | -0.556 | 0.578 | -1.021 | 0.569 |
| wiremon | -0.0036 | 0.004 | -0.795 | 0.427 | -0.012 | 0.005 |
| wireten | -1.557e-06 | 3.05e-05 | -0.051 | 0.959 | -6.14e-05 | 5.83e-05 |
| Omnibus: | 40.356 | Durbin-Watson: | 1.970 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 41.185 |
| Skew: | 0.222 | Prob(JB): | 1.14e-09 |
| Kurtosis: | 3.015 | Cond. No. | 2.62e+05 |
warnings.filterwarnings('ignore')
import pandas_profiling
profile= pandas_profiling.ProfileReport(dataset)
profile
Dataset info
| Number of variables | 125 |
|---|---|
| Number of observations | 5000 |
| Total Missing (%) | 0.0% |
| Total size in memory | 4.8 MiB |
| Average record size in memory | 1000.0 B |
Variables types
| Numeric | 54 |
|---|---|
| Categorical | 0 |
| Boolean | 47 |
| Date | 0 |
| Text (Unique) | 0 |
| Rejected | 24 |
| Unsupported | 0 |
Warnings
address has 245 / 4.9% zeros Zerosaddresscat is highly correlated with address (ρ = 0.92717) Rejectedagecat is highly correlated with age (ρ = 0.96988) Rejectedcarbought has 2901 / 58.0% zeros Zeroscard2tenurecat is highly correlated with cardtenurecat (ρ = 0.92431) Rejectedcardmon has 1419 / 28.4% zeros Zeroscardten has 1420 / 28.4% zeros Zeroscarown has 799 / 16.0% zeros Zeroscars has 497 / 9.9% zeros Zeroscartype has 2287 / 45.7% zeros Zeroscommutecat is highly correlated with commute (ρ = 0.98117) Rejectededcat is highly correlated with ed (ρ = 0.96649) Rejectedempcat is highly correlated with employ (ρ = 0.90359) Rejectedemploy has 659 / 13.2% zeros Zerosequipmon is highly correlated with equip (ρ = 0.94719) Rejectedequipten has 3296 / 65.9% zeros Zeroshourstv has 85 / 1.7% zeros Zerosinccat is highly correlated with lninc (ρ = 0.95154) Rejectedinternet has 2498 / 50.0% zeros Zeroslncardmon is highly correlated with callcard (ρ = 0.94918) Rejectedlncardten is highly correlated with lncardmon (ρ = 0.96006) Rejectedlncreddebt is highly correlated with creddebt (ρ = 0.92026) Rejectedlnequipmon is highly correlated with equipmon (ρ = 0.97195) Rejectedlnequipten is highly correlated with lnequipmon (ρ = 0.98331) Rejectedlnlongten is highly correlated with lnlongmon (ρ = 0.92481) Rejectedlnothdebt is highly correlated with othdebt (ρ = 0.90163) Rejectedlntollmon is highly correlated with tollmon (ρ = 0.93703) Rejectedlntollten is highly correlated with lntollmon (ρ = 0.98558) Rejectedlnwiremon is highly correlated with wiremon (ρ = 0.95325) Rejectedlnwireten is highly correlated with lnwiremon (ρ = 0.98726) Rejectedlongten is highly correlated with longmon (ρ = 0.98281) Rejectedpets has 1529 / 30.6% zeros Zerospets_birds has 4698 / 94.0% zeros Zerospets_cats has 3413 / 68.3% zeros Zerospets_dogs has 3762 / 75.2% zeros Zerospets_freshfish has 3462 / 69.2% zeros Zerospets_reptiles has 4818 / 96.4% zeros Zerospets_saltfish has 4942 / 98.8% zeros Zerospets_small has 4749 / 95.0% zeros Zerosspoused is highly correlated with marital (ρ = 0.95763) Rejectedspousedcat is highly correlated with spoused (ρ = 0.98315) Rejectedtollmon has 2622 / 52.4% zeros Zerostollten has 2622 / 52.4% zeros Zerostotal_fee has 3294 / 65.9% zeros Zerostotal_tenure is highly correlated with tenure (ρ = 0.92561) Rejectedwiremon is highly correlated with wireless (ρ = 0.91316) Rejectedwireten has 3656 / 73.1% zeros Zerosactive
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.466 |
|---|
| 0.0 |
2670
|
|---|---|
| 1.0 |
2330
|
| Value | Count | Frequency (%) | |
| 0.0 | 2670 | 53.4% |
|
| 1.0 | 2330 | 46.6% |
|
address
Numeric
| Distinct count | 49 |
|---|---|
| Unique (%) | 1.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 16.366 |
|---|---|
| Minimum | 0 |
| Maximum | 48 |
| Zeros (%) | 4.9% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 6 |
| Median | 14 |
| Q3 | 25 |
| 95-th percentile | 40 |
| Maximum | 48 |
| Range | 48 |
| Interquartile range | 19 |
Descriptive statistics
| Standard deviation | 12.298 |
|---|---|
| Coef of variation | 0.75141 |
| Kurtosis | -0.38457 |
| Mean | 16.366 |
| MAD | 10.183 |
| Skewness | 0.66468 |
| Sum | 81832 |
| Variance | 151.24 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 245 | 4.9% |
|
| 2.0 | 196 | 3.9% |
|
| 4.0 | 195 | 3.9% |
|
| 5.0 | 177 | 3.5% |
|
| 3.0 | 172 | 3.4% |
|
| 8.0 | 169 | 3.4% |
|
| 1.0 | 169 | 3.4% |
|
| 7.0 | 166 | 3.3% |
|
| 12.0 | 166 | 3.3% |
|
| 6.0 | 163 | 3.3% |
|
| Other values (39) | 3182 | 63.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 245 | 4.9% |
|
| 1.0 | 169 | 3.4% |
|
| 2.0 | 196 | 3.9% |
|
| 3.0 | 172 | 3.4% |
|
| 4.0 | 195 | 3.9% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 44.0 | 27 | 0.5% |
|
| 45.0 | 23 | 0.5% |
|
| 46.0 | 17 | 0.3% |
|
| 47.0 | 18 | 0.4% |
|
| 48.0 | 70 | 1.4% |
|
addresscat
Highly correlated
This variable is highly correlated with address and should be ignored for analysis
| Correlation | 0.92717 |
|---|
age
Numeric
| Distinct count | 62 |
|---|---|
| Unique (%) | 1.2% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 47.026 |
|---|---|
| Minimum | 18 |
| Maximum | 79 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 18 |
|---|---|
| 5-th percentile | 20 |
| Q1 | 31 |
| Median | 47 |
| Q3 | 62 |
| 95-th percentile | 76 |
| Maximum | 79 |
| Range | 61 |
| Interquartile range | 31 |
Descriptive statistics
| Standard deviation | 17.77 |
|---|---|
| Coef of variation | 0.37789 |
| Kurtosis | -1.187 |
| Mean | 47.026 |
| MAD | 15.403 |
| Skewness | 0.09076 |
| Sum | 235130 |
| Variance | 315.78 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 18.0 | 106 | 2.1% |
|
| 35.0 | 102 | 2.0% |
|
| 37.0 | 98 | 2.0% |
|
| 24.0 | 97 | 1.9% |
|
| 63.0 | 95 | 1.9% |
|
| 21.0 | 95 | 1.9% |
|
| 31.0 | 94 | 1.9% |
|
| 25.0 | 93 | 1.9% |
|
| 57.0 | 93 | 1.9% |
|
| 36.0 | 92 | 1.8% |
|
| Other values (52) | 4035 | 80.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 18.0 | 106 | 2.1% |
|
| 19.0 | 78 | 1.6% |
|
| 20.0 | 80 | 1.6% |
|
| 21.0 | 95 | 1.9% |
|
| 22.0 | 82 | 1.6% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 75.0 | 74 | 1.5% |
|
| 76.0 | 58 | 1.2% |
|
| 77.0 | 71 | 1.4% |
|
| 78.0 | 70 | 1.4% |
|
| 79.0 | 73 | 1.5% |
|
agecat
Highly correlated
This variable is highly correlated with age and should be ignored for analysis
| Correlation | 0.96988 |
|---|
bfast
Numeric
| Distinct count | 3 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.0586 |
|---|---|
| Minimum | 1 |
| Maximum | 3 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 1 |
| Median | 2 |
| Q3 | 3 |
| 95-th percentile | 3 |
| Maximum | 3 |
| Range | 2 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 0.82952 |
|---|---|
| Coef of variation | 0.40295 |
| Kurtosis | -1.5385 |
| Mean | 2.0586 |
| MAD | 0.70605 |
| Skewness | -0.10964 |
| Sum | 10293 |
| Variance | 0.6881 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 3.0 | 1875 | 37.5% |
|
| 1.0 | 1582 | 31.6% |
|
| 2.0 | 1543 | 30.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 1582 | 31.6% |
|
| 2.0 | 1543 | 30.9% |
|
| 3.0 | 1875 | 37.5% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 1582 | 31.6% |
|
| 2.0 | 1543 | 30.9% |
|
| 3.0 | 1875 | 37.5% |
|
callcard
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.7162 |
|---|
| 1.0 |
3581
|
|---|---|
| 0.0 |
1419
|
| Value | Count | Frequency (%) | |
| 1.0 | 3581 | 71.6% |
|
| 0.0 | 1419 | 28.4% |
|
callid
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.4752 |
|---|
| 0.0 |
2624
|
|---|---|
| 1.0 |
2376
|
| Value | Count | Frequency (%) | |
| 0.0 | 2624 | 52.5% |
|
| 1.0 | 2376 | 47.5% |
|
callwait
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.479 |
|---|
| 0.0 |
2605
|
|---|---|
| 1.0 |
2395
|
| Value | Count | Frequency (%) | |
| 0.0 | 2605 | 52.1% |
|
| 1.0 | 2395 | 47.9% |
|
carbought
Numeric
| Distinct count | 3 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.221 |
|---|---|
| Minimum | -1 |
| Maximum | 1 |
| Zeros (%) | 58.0% |
Quantile statistics
| Minimum | -1 |
|---|---|
| 5-th percentile | -1 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 1 |
| 95-th percentile | 1 |
| Maximum | 1 |
| Range | 2 |
| Interquartile range | 1 |
Descriptive statistics
| Standard deviation | 0.60912 |
|---|---|
| Coef of variation | 2.7562 |
| Kurtosis | -0.5264 |
| Mean | 0.221 |
| MAD | 0.49918 |
| Skewness | -0.15823 |
| Sum | 1105 |
| Variance | 0.37103 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 2901 | 58.0% |
|
| 1.0 | 1602 | 32.0% |
|
| -1.0 | 497 | 9.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -1.0 | 497 | 9.9% |
|
| 0.0 | 2901 | 58.0% |
|
| 1.0 | 1602 | 32.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| -1.0 | 497 | 9.9% |
|
| 0.0 | 2901 | 58.0% |
|
| 1.0 | 1602 | 32.0% |
|
carbuy
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.361 |
|---|
| 0.0 |
3195
|
|---|---|
| 1.0 |
1805
|
| Value | Count | Frequency (%) | |
| 0.0 | 3195 | 63.9% |
|
| 1.0 | 1805 | 36.1% |
|
carcatvalue
Numeric
| Distinct count | 4 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.3894 |
|---|---|
| Minimum | -1 |
| Maximum | 3 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -1 |
|---|---|
| 5-th percentile | -1 |
| Q1 | 1 |
| Median | 1 |
| Q3 | 2 |
| 95-th percentile | 3 |
| Maximum | 3 |
| Range | 4 |
| Interquartile range | 1 |
Descriptive statistics
| Standard deviation | 1.0813 |
|---|---|
| Coef of variation | 0.77825 |
| Kurtosis | 0.23064 |
| Mean | 1.3894 |
| MAD | 0.84868 |
| Skewness | -0.49643 |
| Sum | 6947 |
| Variance | 1.1692 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 1.0 | 2399 | 48.0% |
|
| 2.0 | 1267 | 25.3% |
|
| 3.0 | 837 | 16.7% |
|
| -1.0 | 497 | 9.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -1.0 | 497 | 9.9% |
|
| 1.0 | 2399 | 48.0% |
|
| 2.0 | 1267 | 25.3% |
|
| 3.0 | 837 | 16.7% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| -1.0 | 497 | 9.9% |
|
| 1.0 | 2399 | 48.0% |
|
| 2.0 | 1267 | 25.3% |
|
| 3.0 | 837 | 16.7% |
|
card
Numeric
| Distinct count | 5 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.7142 |
|---|---|
| Minimum | 1 |
| Maximum | 5 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 2 |
| Median | 3 |
| Q3 | 4 |
| 95-th percentile | 4 |
| Maximum | 5 |
| Range | 4 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.1849 |
|---|---|
| Coef of variation | 0.43656 |
| Kurtosis | -1.1112 |
| Mean | 2.7142 |
| MAD | 1.0323 |
| Skewness | 0.015333 |
| Sum | 13571 |
| Variance | 1.404 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 4.0 | 1344 | 26.9% |
|
| 2.0 | 1247 | 24.9% |
|
| 3.0 | 1200 | 24.0% |
|
| 1.0 | 986 | 19.7% |
|
| 5.0 | 223 | 4.5% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 986 | 19.7% |
|
| 2.0 | 1247 | 24.9% |
|
| 3.0 | 1200 | 24.0% |
|
| 4.0 | 1344 | 26.9% |
|
| 5.0 | 223 | 4.5% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 986 | 19.7% |
|
| 2.0 | 1247 | 24.9% |
|
| 3.0 | 1200 | 24.0% |
|
| 4.0 | 1344 | 26.9% |
|
| 5.0 | 223 | 4.5% |
|
card2
Numeric
| Distinct count | 5 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.7744 |
|---|---|
| Minimum | 1 |
| Maximum | 5 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 2 |
| Median | 3 |
| Q3 | 4 |
| 95-th percentile | 5 |
| Maximum | 5 |
| Range | 4 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.1734 |
|---|---|
| Coef of variation | 0.42296 |
| Kurtosis | -0.91791 |
| Mean | 2.7744 |
| MAD | 0.99139 |
| Skewness | 0.084736 |
| Sum | 13872 |
| Variance | 1.377 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 3.0 | 1384 | 27.7% |
|
| 2.0 | 1301 | 26.0% |
|
| 4.0 | 1141 | 22.8% |
|
| 1.0 | 829 | 16.6% |
|
| 5.0 | 345 | 6.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 829 | 16.6% |
|
| 2.0 | 1301 | 26.0% |
|
| 3.0 | 1384 | 27.7% |
|
| 4.0 | 1141 | 22.8% |
|
| 5.0 | 345 | 6.9% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 829 | 16.6% |
|
| 2.0 | 1301 | 26.0% |
|
| 3.0 | 1384 | 27.7% |
|
| 4.0 | 1141 | 22.8% |
|
| 5.0 | 345 | 6.9% |
|
card2tenurecat
Highly correlated
This variable is highly correlated with cardtenurecat and should be ignored for analysis
| Correlation | 0.92431 |
|---|
card2type
Numeric
| Distinct count | 4 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.5412 |
|---|---|
| Minimum | 1 |
| Maximum | 4 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 2 |
| Median | 3 |
| Q3 | 4 |
| 95-th percentile | 4 |
| Maximum | 4 |
| Range | 3 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.1188 |
|---|---|
| Coef of variation | 0.44027 |
| Kurtosis | -1.3601 |
| Mean | 2.5412 |
| MAD | 1.0003 |
| Skewness | -0.04748 |
| Sum | 12706 |
| Variance | 1.2518 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 4.0 | 1319 | 26.4% |
|
| 3.0 | 1257 | 25.1% |
|
| 2.0 | 1235 | 24.7% |
|
| 1.0 | 1189 | 23.8% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 1189 | 23.8% |
|
| 2.0 | 1235 | 24.7% |
|
| 3.0 | 1257 | 25.1% |
|
| 4.0 | 1319 | 26.4% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 1189 | 23.8% |
|
| 2.0 | 1235 | 24.7% |
|
| 3.0 | 1257 | 25.1% |
|
| 4.0 | 1319 | 26.4% |
|
cardmon
Numeric
| Distinct count | 229 |
|---|---|
| Unique (%) | 4.6% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 15.267 |
|---|---|
| Minimum | 0 |
| Maximum | 64.25 |
| Zeros (%) | 28.4% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 13.75 |
| Q3 | 22.75 |
| 95-th percentile | 42 |
| Maximum | 64.25 |
| Range | 64.25 |
| Interquartile range | 22.75 |
Descriptive statistics
| Standard deviation | 14.156 |
|---|---|
| Coef of variation | 0.92725 |
| Kurtosis | 1.022 |
| Mean | 15.267 |
| MAD | 11.048 |
| Skewness | 1.0164 |
| Sum | 76335 |
| Variance | 200.4 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 1419 | 28.4% |
|
| 13.25 | 53 | 1.1% |
|
| 11.5 | 52 | 1.0% |
|
| 64.25 | 51 | 1.0% |
|
| 16.25 | 49 | 1.0% |
|
| 16.5 | 49 | 1.0% |
|
| 13.75 | 47 | 0.9% |
|
| 18.25 | 45 | 0.9% |
|
| 13.5 | 45 | 0.9% |
|
| 15.0 | 44 | 0.9% |
|
| Other values (219) | 3146 | 62.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 1419 | 28.4% |
|
| 3.25 | 1 | 0.0% |
|
| 3.75 | 1 | 0.0% |
|
| 4.0 | 3 | 0.1% |
|
| 4.25 | 9 | 0.2% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 63.25 | 1 | 0.0% |
|
| 63.5 | 1 | 0.0% |
|
| 63.75 | 1 | 0.0% |
|
| 64.0 | 2 | 0.0% |
|
| 64.25 | 51 | 1.0% |
|
cardten
Numeric
| Distinct count | 651 |
|---|---|
| Unique (%) | 13.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 707.24 |
|---|---|
| Minimum | 0 |
| Maximum | 4010.4 |
| Zeros (%) | 28.4% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 425 |
| Q3 | 1080 |
| 95-th percentile | 2455.3 |
| Maximum | 4010.4 |
| Range | 4010.4 |
| Interquartile range | 1080 |
Descriptive statistics
| Standard deviation | 848.14 |
|---|---|
| Coef of variation | 1.1992 |
| Kurtosis | 2.6283 |
| Mean | 707.24 |
| MAD | 650.61 |
| Skewness | 1.6086 |
| Sum | 3536200 |
| Variance | 719340 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 1420 | 28.4% |
|
| 4010.4000000000087 | 50 | 1.0% |
|
| 590.0 | 21 | 0.4% |
|
| 200.0 | 20 | 0.4% |
|
| 380.0 | 20 | 0.4% |
|
| 195.0 | 19 | 0.4% |
|
| 45.0 | 19 | 0.4% |
|
| 500.0 | 19 | 0.4% |
|
| 220.0 | 18 | 0.4% |
|
| 330.0 | 18 | 0.4% |
|
| Other values (641) | 3376 | 67.5% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 1420 | 28.4% |
|
| 4.75 | 1 | 0.0% |
|
| 5.0 | 17 | 0.3% |
|
| 5.25 | 1 | 0.0% |
|
| 7.75 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 3960.0 | 1 | 0.0% |
|
| 3980.0 | 1 | 0.0% |
|
| 4000.0 | 1 | 0.0% |
|
| 4010.0 | 1 | 0.0% |
|
| 4010.4000000000087 | 50 | 1.0% |
|
cardtenurecat
Numeric
| Distinct count | 5 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 3.7822 |
|---|---|
| Minimum | 1 |
| Maximum | 5 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 3 |
| Median | 4 |
| Q3 | 5 |
| 95-th percentile | 5 |
| Maximum | 5 |
| Range | 4 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.3538 |
|---|---|
| Coef of variation | 0.35794 |
| Kurtosis | -1.0266 |
| Mean | 3.7822 |
| MAD | 1.2057 |
| Skewness | -0.62824 |
| Sum | 18911 |
| Variance | 1.8327 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 5.0 | 2351 | 47.0% |
|
| 2.0 | 847 | 16.9% |
|
| 3.0 | 789 | 15.8% |
|
| 4.0 | 694 | 13.9% |
|
| 1.0 | 319 | 6.4% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 319 | 6.4% |
|
| 2.0 | 847 | 16.9% |
|
| 3.0 | 789 | 15.8% |
|
| 4.0 | 694 | 13.9% |
|
| 5.0 | 2351 | 47.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 319 | 6.4% |
|
| 2.0 | 847 | 16.9% |
|
| 3.0 | 789 | 15.8% |
|
| 4.0 | 694 | 13.9% |
|
| 5.0 | 2351 | 47.0% |
|
cardtype
Numeric
| Distinct count | 4 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.507 |
|---|---|
| Minimum | 1 |
| Maximum | 4 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 2 |
| Median | 3 |
| Q3 | 4 |
| 95-th percentile | 4 |
| Maximum | 4 |
| Range | 3 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.1185 |
|---|---|
| Coef of variation | 0.44614 |
| Kurtosis | -1.3608 |
| Mean | 2.507 |
| MAD | 1.0004 |
| Skewness | -0.0098086 |
| Sum | 12535 |
| Variance | 1.251 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 4.0 | 1260 | 25.2% |
|
| 3.0 | 1257 | 25.1% |
|
| 1.0 | 1242 | 24.8% |
|
| 2.0 | 1241 | 24.8% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 1242 | 24.8% |
|
| 2.0 | 1241 | 24.8% |
|
| 3.0 | 1257 | 25.1% |
|
| 4.0 | 1260 | 25.2% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 1242 | 24.8% |
|
| 2.0 | 1241 | 24.8% |
|
| 3.0 | 1257 | 25.1% |
|
| 4.0 | 1260 | 25.2% |
|
carown
Numeric
| Distinct count | 3 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.6414 |
|---|---|
| Minimum | -1 |
| Maximum | 1 |
| Zeros (%) | 16.0% |
Quantile statistics
| Minimum | -1 |
|---|---|
| 5-th percentile | -1 |
| Q1 | 0 |
| Median | 1 |
| Q3 | 1 |
| 95-th percentile | 1 |
| Maximum | 1 |
| Range | 2 |
| Interquartile range | 1 |
Descriptive statistics
| Standard deviation | 0.6549 |
|---|---|
| Coef of variation | 1.021 |
| Kurtosis | 1.14 |
| Mean | 0.6414 |
| MAD | 0.5313 |
| Skewness | -1.5944 |
| Sum | 3207 |
| Variance | 0.42889 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 1.0 | 3704 | 74.1% |
|
| 0.0 | 799 | 16.0% |
|
| -1.0 | 497 | 9.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -1.0 | 497 | 9.9% |
|
| 0.0 | 799 | 16.0% |
|
| 1.0 | 3704 | 74.1% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| -1.0 | 497 | 9.9% |
|
| 0.0 | 799 | 16.0% |
|
| 1.0 | 3704 | 74.1% |
|
cars
Numeric
| Distinct count | 7 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.1276 |
|---|---|
| Minimum | 0 |
| Maximum | 6 |
| Zeros (%) | 9.9% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 1 |
| Median | 2 |
| Q3 | 3 |
| 95-th percentile | 4 |
| Maximum | 6 |
| Range | 6 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.2972 |
|---|---|
| Coef of variation | 0.60972 |
| Kurtosis | 0.059786 |
| Mean | 2.1276 |
| MAD | 1.0097 |
| Skewness | 0.43884 |
| Sum | 10638 |
| Variance | 1.6829 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 2.0 | 1607 | 32.1% |
|
| 1.0 | 1119 | 22.4% |
|
| 3.0 | 1082 | 21.6% |
|
| 0.0 | 497 | 9.9% |
|
| 4.0 | 481 | 9.6% |
|
| 5.0 | 149 | 3.0% |
|
| 6.0 | 65 | 1.3% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 497 | 9.9% |
|
| 1.0 | 1119 | 22.4% |
|
| 2.0 | 1607 | 32.1% |
|
| 3.0 | 1082 | 21.6% |
|
| 4.0 | 481 | 9.6% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2.0 | 1607 | 32.1% |
|
| 3.0 | 1082 | 21.6% |
|
| 4.0 | 481 | 9.6% |
|
| 5.0 | 149 | 3.0% |
|
| 6.0 | 65 | 1.3% |
|
cartype
Numeric
| Distinct count | 3 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.3438 |
|---|---|
| Minimum | -1 |
| Maximum | 1 |
| Zeros (%) | 45.7% |
Quantile statistics
| Minimum | -1 |
|---|---|
| 5-th percentile | -1 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 1 |
| 95-th percentile | 1 |
| Maximum | 1 |
| Range | 2 |
| Interquartile range | 1 |
Descriptive statistics
| Standard deviation | 0.65153 |
|---|---|
| Coef of variation | 1.8951 |
| Kurtosis | -0.70821 |
| Mean | 0.3438 |
| MAD | 0.58166 |
| Skewness | -0.48685 |
| Sum | 1719 |
| Variance | 0.42449 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 2287 | 45.7% |
|
| 1.0 | 2216 | 44.3% |
|
| -1.0 | 497 | 9.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -1.0 | 497 | 9.9% |
|
| 0.0 | 2287 | 45.7% |
|
| 1.0 | 2216 | 44.3% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| -1.0 | 497 | 9.9% |
|
| 0.0 | 2287 | 45.7% |
|
| 1.0 | 2216 | 44.3% |
|
carvalue
Numeric
| Distinct count | 767 |
|---|---|
| Unique (%) | 15.3% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 23.202 |
|---|---|
| Minimum | -1 |
| Maximum | 92.001 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -1 |
|---|---|
| 5-th percentile | -1 |
| Q1 | 9.2 |
| Median | 17 |
| Q3 | 31.1 |
| 95-th percentile | 72 |
| Maximum | 92.001 |
| Range | 93.001 |
| Interquartile range | 21.9 |
Descriptive statistics
| Standard deviation | 21.13 |
|---|---|
| Coef of variation | 0.91068 |
| Kurtosis | 1.8374 |
| Mean | 23.202 |
| MAD | 15.866 |
| Skewness | 1.451 |
| Sum | 116010 |
| Variance | 446.47 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| -1.0 | 497 | 9.9% |
|
| 92.00100000000002 | 50 | 1.0% |
|
| 9.8 | 25 | 0.5% |
|
| 13.5 | 24 | 0.5% |
|
| 6.300000000000001 | 24 | 0.5% |
|
| 10.200000000000001 | 23 | 0.5% |
|
| 13.0 | 23 | 0.5% |
|
| 15.8 | 22 | 0.4% |
|
| 9.200000000000001 | 22 | 0.4% |
|
| 9.1 | 22 | 0.4% |
|
| Other values (757) | 4268 | 85.4% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -1.0 | 497 | 9.9% |
|
| 2.2 | 1 | 0.0% |
|
| 2.3000000000000003 | 1 | 0.0% |
|
| 2.4000000000000004 | 1 | 0.0% |
|
| 2.5 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 91.7 | 1 | 0.0% |
|
| 91.80000000000001 | 1 | 0.0% |
|
| 91.9 | 1 | 0.0% |
|
| 92.0 | 1 | 0.0% |
|
| 92.00100000000002 | 50 | 1.0% |
|
churn
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.2532 |
|---|
| 0.0 |
3734
|
|---|---|
| 1.0 |
1266
|
| Value | Count | Frequency (%) | |
| 0.0 | 3734 | 74.7% |
|
| 1.0 | 1266 | 25.3% |
|
commute
Numeric
| Distinct count | 10 |
|---|---|
| Unique (%) | 0.2% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.9962 |
|---|---|
| Minimum | 1 |
| Maximum | 10 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 1 |
| Median | 1 |
| Q3 | 4 |
| 95-th percentile | 8 |
| Maximum | 10 |
| Range | 9 |
| Interquartile range | 3 |
Descriptive statistics
| Standard deviation | 2.7435 |
|---|---|
| Coef of variation | 0.91567 |
| Kurtosis | -0.045572 |
| Mean | 2.9962 |
| MAD | 2.2996 |
| Skewness | 1.1277 |
| Sum | 14981 |
| Variance | 7.5269 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 1.0 | 2855 | 57.1% |
|
| 4.0 | 635 | 12.7% |
|
| 8.0 | 585 | 11.7% |
|
| 5.0 | 302 | 6.0% |
|
| 3.0 | 295 | 5.9% |
|
| 10.0 | 153 | 3.1% |
|
| 7.0 | 56 | 1.1% |
|
| 2.0 | 50 | 1.0% |
|
| 6.0 | 44 | 0.9% |
|
| 9.0 | 25 | 0.5% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 2855 | 57.1% |
|
| 2.0 | 50 | 1.0% |
|
| 3.0 | 295 | 5.9% |
|
| 4.0 | 635 | 12.7% |
|
| 5.0 | 302 | 6.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 6.0 | 44 | 0.9% |
|
| 7.0 | 56 | 1.1% |
|
| 8.0 | 585 | 11.7% |
|
| 9.0 | 25 | 0.5% |
|
| 10.0 | 153 | 3.1% |
|
commutebike
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.1234 |
|---|
| 0.0 |
4383
|
|---|---|
| 1.0 |
|
| Value | Count | Frequency (%) | |
| 0.0 | 4383 | 87.7% |
|
| 1.0 | 617 | 12.3% |
|
commutebus
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.406 |
|---|
| 0.0 |
2970
|
|---|---|
| 1.0 |
2030
|
| Value | Count | Frequency (%) | |
| 0.0 | 2970 | 59.4% |
|
| 1.0 | 2030 | 40.6% |
|
commutecar
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.679 |
|---|
| 1.0 |
3395
|
|---|---|
| 0.0 |
1605
|
| Value | Count | Frequency (%) | |
| 1.0 | 3395 | 67.9% |
|
| 0.0 | 1605 | 32.1% |
|
commutecarpool
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.2718 |
|---|
| 0.0 |
3641
|
|---|---|
| 1.0 |
1359
|
| Value | Count | Frequency (%) | |
| 0.0 | 3641 | 72.8% |
|
| 1.0 | 1359 | 27.2% |
|
commutecat
Highly correlated
This variable is highly correlated with commute and should be ignored for analysis
| Correlation | 0.98117 |
|---|
commutemotorcycle
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.1026 |
|---|
| 0.0 |
4487
|
|---|---|
| 1.0 |
|
| Value | Count | Frequency (%) | |
| 0.0 | 4487 | 89.7% |
|
| 1.0 | 513 | 10.3% |
|
commutenonmotor
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.0584 |
|---|
| 0.0 |
4708
|
|---|---|
| 1.0 |
|
| Value | Count | Frequency (%) | |
| 0.0 | 4708 | 94.2% |
|
| 1.0 | 292 | 5.8% |
|
commutepublic
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.0954 |
|---|
| 0.0 |
4523
|
|---|---|
| 1.0 |
|
| Value | Count | Frequency (%) | |
| 0.0 | 4523 | 90.5% |
|
| 1.0 | 477 | 9.5% |
|
commuterail
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.2746 |
|---|
| 0.0 |
3627
|
|---|---|
| 1.0 |
1373
|
| Value | Count | Frequency (%) | |
| 0.0 | 3627 | 72.5% |
|
| 1.0 | 1373 | 27.5% |
|
commutetime
Numeric
| Distinct count | 30 |
|---|---|
| Unique (%) | 0.6% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 25.332 |
|---|---|
| Minimum | 13 |
| Maximum | 40.01 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 13 |
|---|---|
| 5-th percentile | 16 |
| Q1 | 21 |
| Median | 25 |
| Q3 | 29 |
| 95-th percentile | 35 |
| Maximum | 40.01 |
| Range | 27.01 |
| Interquartile range | 8 |
Descriptive statistics
| Standard deviation | 5.7542 |
|---|---|
| Coef of variation | 0.22715 |
| Kurtosis | -0.26483 |
| Mean | 25.332 |
| MAD | 4.6409 |
| Skewness | 0.23159 |
| Sum | 126660 |
| Variance | 33.111 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 24.0 | 336 | 6.7% |
|
| 23.0 | 335 | 6.7% |
|
| 27.0 | 331 | 6.6% |
|
| 25.0 | 330 | 6.6% |
|
| 22.0 | 325 | 6.5% |
|
| 26.0 | 311 | 6.2% |
|
| 21.0 | 307 | 6.1% |
|
| 28.0 | 293 | 5.9% |
|
| 29.0 | 260 | 5.2% |
|
| 30.0 | 226 | 4.5% |
|
| Other values (20) | 1946 | 38.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 13.0 | 72 | 1.4% |
|
| 14.0 | 33 | 0.7% |
|
| 15.0 | 84 | 1.7% |
|
| 16.0 | 98 | 2.0% |
|
| 17.0 | 130 | 2.6% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 37.0 | 39 | 0.8% |
|
| 38.0 | 42 | 0.8% |
|
| 39.0 | 31 | 0.6% |
|
| 40.0 | 17 | 0.3% |
|
| 40.01000000000022 | 50 | 1.0% |
|
commutewalk
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.3838 |
|---|
| 0.0 |
3081
|
|---|---|
| 1.0 |
1919
|
| Value | Count | Frequency (%) | |
| 0.0 | 3081 | 61.6% |
|
| 1.0 | 1919 | 38.4% |
|
confer
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.478 |
|---|
| 0.0 |
2610
|
|---|---|
| 1.0 |
2390
|
| Value | Count | Frequency (%) | |
| 0.0 | 2610 | 52.2% |
|
| 1.0 | 2390 | 47.8% |
|
creddebt
Numeric
| Distinct count | 4852 |
|---|---|
| Unique (%) | 97.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.7584 |
|---|---|
| Minimum | 0.03316 |
| Maximum | 14.28 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 0.03316 |
|---|---|
| 5-th percentile | 0.10109 |
| Q1 | 0.38552 |
| Median | 0.92644 |
| Q3 | 2.0638 |
| 95-th percentile | 6.373 |
| Maximum | 14.28 |
| Range | 14.247 |
| Interquartile range | 1.6783 |
Descriptive statistics
| Standard deviation | 2.3807 |
|---|---|
| Coef of variation | 1.3539 |
| Kurtosis | 10.45 |
| Mean | 1.7584 |
| MAD | 1.5277 |
| Skewness | 2.9733 |
| Sum | 8791.8 |
| Variance | 5.6676 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 14.280358400000008 | 50 | 1.0% |
|
| 0.03316008 | 50 | 1.0% |
|
| 0.23587200000000003 | 2 | 0.0% |
|
| 0.532224 | 2 | 0.0% |
|
| 0.37044 | 2 | 0.0% |
|
| 0.31600799999999996 | 2 | 0.0% |
|
| 0.658368 | 2 | 0.0% |
|
| 1.36125 | 2 | 0.0% |
|
| 0.4984199999999999 | 2 | 0.0% |
|
| 0.129778 | 2 | 0.0% |
|
| Other values (4842) | 4884 | 97.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.03316008 | 50 | 1.0% |
|
| 0.033166 | 1 | 0.0% |
|
| 0.03332000000000001 | 1 | 0.0% |
|
| 0.033408 | 1 | 0.0% |
|
| 0.03417600000000001 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 14.253551999999997 | 1 | 0.0% |
|
| 14.25886 | 1 | 0.0% |
|
| 14.267784 | 1 | 0.0% |
|
| 14.28 | 1 | 0.0% |
|
| 14.280358400000008 | 50 | 1.0% |
|
debtinc
Numeric
| Distinct count | 280 |
|---|---|
| Unique (%) | 5.6% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 9.9141 |
|---|---|
| Minimum | 0.7 |
| Maximum | 29.2 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 0.7 |
|---|---|
| 5-th percentile | 1.9 |
| Q1 | 5.1 |
| Median | 8.8 |
| Q3 | 13.6 |
| 95-th percentile | 22.2 |
| Maximum | 29.2 |
| Range | 28.5 |
| Interquartile range | 8.5 |
Descriptive statistics
| Standard deviation | 6.2417 |
|---|---|
| Coef of variation | 0.62958 |
| Kurtosis | 0.44555 |
| Mean | 9.9141 |
| MAD | 4.9702 |
| Skewness | 0.88787 |
| Sum | 49571 |
| Variance | 38.959 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.7000000000000001 | 54 | 1.1% |
|
| 29.2 | 53 | 1.1% |
|
| 7.000000000000001 | 48 | 1.0% |
|
| 4.1000000000000005 | 46 | 0.9% |
|
| 6.9 | 46 | 0.9% |
|
| 5.4 | 45 | 0.9% |
|
| 6.6000000000000005 | 42 | 0.8% |
|
| 4.3999999999999995 | 42 | 0.8% |
|
| 7.3 | 41 | 0.8% |
|
| 11.3 | 39 | 0.8% |
|
| Other values (270) | 4544 | 90.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.7000000000000001 | 54 | 1.1% |
|
| 0.8 | 15 | 0.3% |
|
| 0.8999999999999999 | 11 | 0.2% |
|
| 1.0 | 10 | 0.2% |
|
| 1.0999999999999999 | 18 | 0.4% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 28.7 | 2 | 0.0% |
|
| 28.799999999999997 | 1 | 0.0% |
|
| 28.9 | 2 | 0.0% |
|
| 28.999999999999996 | 1 | 0.0% |
|
| 29.2 | 53 | 1.1% |
|
default
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.2342 |
|---|
| 0.0 |
3829
|
|---|---|
| 1.0 |
1171
|
| Value | Count | Frequency (%) | |
| 0.0 | 3829 | 76.6% |
|
| 1.0 | 1171 | 23.4% |
|
ebill
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.3486 |
|---|
| 0.0 |
3257
|
|---|---|
| 1.0 |
1743
|
| Value | Count | Frequency (%) | |
| 0.0 | 3257 | 65.1% |
|
| 1.0 | 1743 | 34.9% |
|
ed
Numeric
| Distinct count | 14 |
|---|---|
| Unique (%) | 0.3% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 14.544 |
|---|---|
| Minimum | 8 |
| Maximum | 21 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 8 |
|---|---|
| 5-th percentile | 9 |
| Q1 | 12 |
| Median | 14 |
| Q3 | 17 |
| 95-th percentile | 20 |
| Maximum | 21 |
| Range | 13 |
| Interquartile range | 5 |
Descriptive statistics
| Standard deviation | 3.2426 |
|---|---|
| Coef of variation | 0.22294 |
| Kurtosis | -0.72614 |
| Mean | 14.544 |
| MAD | 2.6898 |
| Skewness | 0.0099514 |
| Sum | 72721 |
| Variance | 10.514 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 14.0 | 569 | 11.4% |
|
| 15.0 | 536 | 10.7% |
|
| 13.0 | 531 | 10.6% |
|
| 16.0 | 486 | 9.7% |
|
| 12.0 | 467 | 9.3% |
|
| 17.0 | 454 | 9.1% |
|
| 11.0 | 362 | 7.2% |
|
| 18.0 | 349 | 7.0% |
|
| 19.0 | 308 | 6.2% |
|
| 10.0 | 260 | 5.2% |
|
| Other values (4) | 678 | 13.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 8.0 | 146 | 2.9% |
|
| 9.0 | 178 | 3.6% |
|
| 10.0 | 260 | 5.2% |
|
| 11.0 | 362 | 7.2% |
|
| 12.0 | 467 | 9.3% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 17.0 | 454 | 9.1% |
|
| 18.0 | 349 | 7.0% |
|
| 19.0 | 308 | 6.2% |
|
| 20.0 | 206 | 4.1% |
|
| 21.0 | 148 | 3.0% |
|
edcat
Highly correlated
This variable is highly correlated with ed and should be ignored for analysis
| Correlation | 0.96649 |
|---|
empcat
Highly correlated
This variable is highly correlated with employ and should be ignored for analysis
| Correlation | 0.90359 |
|---|
employ
Numeric
| Distinct count | 40 |
|---|---|
| Unique (%) | 0.8% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 9.698 |
|---|---|
| Minimum | 0 |
| Maximum | 39 |
| Zeros (%) | 13.2% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 2 |
| Median | 7 |
| Q3 | 15 |
| 95-th percentile | 31 |
| Maximum | 39 |
| Range | 39 |
| Interquartile range | 13 |
Descriptive statistics
| Standard deviation | 9.5817 |
|---|---|
| Coef of variation | 0.98801 |
| Kurtosis | 0.71875 |
| Mean | 9.698 |
| MAD | 7.6253 |
| Skewness | 1.188 |
| Sum | 48490 |
| Variance | 91.809 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 659 | 13.2% |
|
| 1.0 | 389 | 7.8% |
|
| 2.0 | 318 | 6.4% |
|
| 3.0 | 309 | 6.2% |
|
| 4.0 | 293 | 5.9% |
|
| 5.0 | 260 | 5.2% |
|
| 6.0 | 250 | 5.0% |
|
| 7.0 | 191 | 3.8% |
|
| 8.0 | 187 | 3.7% |
|
| 11.0 | 184 | 3.7% |
|
| Other values (30) | 1960 | 39.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 659 | 13.2% |
|
| 1.0 | 389 | 7.8% |
|
| 2.0 | 318 | 6.4% |
|
| 3.0 | 309 | 6.2% |
|
| 4.0 | 293 | 5.9% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 35.0 | 22 | 0.4% |
|
| 36.0 | 18 | 0.4% |
|
| 37.0 | 17 | 0.3% |
|
| 38.0 | 18 | 0.4% |
|
| 39.0 | 55 | 1.1% |
|
equip
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.3408 |
|---|
| 0.0 |
3296
|
|---|---|
| 1.0 |
1704
|
| Value | Count | Frequency (%) | |
| 0.0 | 3296 | 65.9% |
|
| 1.0 | 1704 | 34.1% |
|
equipmon
Highly correlated
This variable is highly correlated with equip and should be ignored for analysis
| Correlation | 0.94719 |
|---|
equipten
Numeric
| Distinct count | 1634 |
|---|---|
| Unique (%) | 32.7% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 463.4 |
|---|---|
| Minimum | 0 |
| Maximum | 3679.5 |
| Zeros (%) | 65.9% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 510.16 |
| 95-th percentile | 2601 |
| Maximum | 3679.5 |
| Range | 3679.5 |
| Interquartile range | 510.16 |
Descriptive statistics
| Standard deviation | 882.83 |
|---|---|
| Coef of variation | 1.9051 |
| Kurtosis | 3.0986 |
| Mean | 463.4 |
| MAD | 654.48 |
| Skewness | 2.0084 |
| Sum | 2317000 |
| Variance | 779390 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 3296 | 65.9% |
|
| 3679.4575 | 50 | 1.0% |
|
| 2778.3 | 2 | 0.0% |
|
| 1918.8 | 2 | 0.0% |
|
| 2357.9 | 2 | 0.0% |
|
| 206.7 | 2 | 0.0% |
|
| 163.4 | 2 | 0.0% |
|
| 446.45 | 2 | 0.0% |
|
| 224.7 | 2 | 0.0% |
|
| 101.05 | 2 | 0.0% |
|
| Other values (1624) | 1638 | 32.8% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3296 | 65.9% |
|
| 12.05 | 1 | 0.0% |
|
| 14.65 | 1 | 0.0% |
|
| 14.85 | 1 | 0.0% |
|
| 16.1 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 3672.65 | 1 | 0.0% |
|
| 3675.15 | 1 | 0.0% |
|
| 3676.2 | 1 | 0.0% |
|
| 3679.45 | 1 | 0.0% |
|
| 3679.4575 | 50 | 1.0% |
|
forward
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.4806 |
|---|
| 0.0 |
2597
|
|---|---|
| 1.0 |
2403
|
| Value | Count | Frequency (%) | |
| 0.0 | 2597 | 51.9% |
|
| 1.0 | 2403 | 48.1% |
|
gender
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.5036 |
|---|
| 1.0 |
2518
|
|---|---|
| 0.0 |
2482
|
| Value | Count | Frequency (%) | |
| 1.0 | 2518 | 50.4% |
|
| 0.0 | 2482 | 49.6% |
|
homeown
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.6296 |
|---|
| 1.0 |
3148
|
|---|---|
| 0.0 |
1852
|
| Value | Count | Frequency (%) | |
| 1.0 | 3148 | 63.0% |
|
| 0.0 | 1852 | 37.0% |
|
hometype
Numeric
| Distinct count | 4 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.8426 |
|---|---|
| Minimum | 1 |
| Maximum | 4 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 1 |
| Median | 2 |
| Q3 | 2 |
| 95-th percentile | 4 |
| Maximum | 4 |
| Range | 3 |
| Interquartile range | 1 |
Descriptive statistics
| Standard deviation | 0.91673 |
|---|---|
| Coef of variation | 0.49752 |
| Kurtosis | -0.43415 |
| Mean | 1.8426 |
| MAD | 0.7634 |
| Skewness | 0.76947 |
| Sum | 9213 |
| Variance | 0.84039 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 1.0 | 2265 | 45.3% |
|
| 2.0 | 1548 | 31.0% |
|
| 3.0 | 896 | 17.9% |
|
| 4.0 | 291 | 5.8% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 2265 | 45.3% |
|
| 2.0 | 1548 | 31.0% |
|
| 3.0 | 896 | 17.9% |
|
| 4.0 | 291 | 5.8% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 2265 | 45.3% |
|
| 2.0 | 1548 | 31.0% |
|
| 3.0 | 896 | 17.9% |
|
| 4.0 | 291 | 5.8% |
|
hourstv
Numeric
| Distinct count | 27 |
|---|---|
| Unique (%) | 0.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 19.627 |
|---|---|
| Minimum | 0 |
| Maximum | 31 |
| Zeros (%) | 1.7% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 12 |
| Q1 | 17 |
| Median | 20 |
| Q3 | 23 |
| 95-th percentile | 28 |
| Maximum | 31 |
| Range | 31 |
| Interquartile range | 6 |
Descriptive statistics
| Standard deviation | 5.1192 |
|---|---|
| Coef of variation | 0.26083 |
| Kurtosis | 2.2956 |
| Mean | 19.627 |
| MAD | 3.8446 |
| Skewness | -0.72124 |
| Sum | 98133 |
| Variance | 26.206 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 20.0 | 451 | 9.0% |
|
| 19.0 | 445 | 8.9% |
|
| 21.0 | 440 | 8.8% |
|
| 18.0 | 413 | 8.3% |
|
| 22.0 | 371 | 7.4% |
|
| 17.0 | 350 | 7.0% |
|
| 16.0 | 309 | 6.2% |
|
| 23.0 | 301 | 6.0% |
|
| 15.0 | 263 | 5.3% |
|
| 24.0 | 248 | 5.0% |
|
| Other values (17) | 1409 | 28.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 85 | 1.7% |
|
| 6.0 | 1 | 0.0% |
|
| 7.0 | 3 | 0.1% |
|
| 8.0 | 9 | 0.2% |
|
| 9.0 | 13 | 0.3% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 27.0 | 127 | 2.5% |
|
| 28.0 | 89 | 1.8% |
|
| 29.0 | 73 | 1.5% |
|
| 30.0 | 42 | 0.8% |
|
| 31.0 | 66 | 1.3% |
|
inccat
Highly correlated
This variable is highly correlated with lninc and should be ignored for analysis
| Correlation | 0.95154 |
|---|
income
Numeric
| Distinct count | 223 |
|---|---|
| Unique (%) | 4.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 53.63 |
|---|---|
| Minimum | 9 |
| Maximum | 272.01 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 9 |
|---|---|
| 5-th percentile | 13 |
| Q1 | 24 |
| Median | 38 |
| Q3 | 67 |
| 95-th percentile | 147 |
| Maximum | 272.01 |
| Range | 263.01 |
| Interquartile range | 43 |
Descriptive statistics
| Standard deviation | 46.568 |
|---|---|
| Coef of variation | 0.86832 |
| Kurtosis | 6.1931 |
| Mean | 53.63 |
| MAD | 32.56 |
| Skewness | 2.2674 |
| Sum | 268150 |
| Variance | 2168.6 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 22.0 | 112 | 2.2% |
|
| 29.0 | 109 | 2.2% |
|
| 25.0 | 108 | 2.2% |
|
| 20.0 | 102 | 2.0% |
|
| 30.0 | 102 | 2.0% |
|
| 18.0 | 100 | 2.0% |
|
| 23.0 | 100 | 2.0% |
|
| 24.0 | 99 | 2.0% |
|
| 32.0 | 93 | 1.9% |
|
| 21.0 | 91 | 1.8% |
|
| Other values (213) | 3984 | 79.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 9.0 | 83 | 1.7% |
|
| 10.0 | 55 | 1.1% |
|
| 11.0 | 57 | 1.1% |
|
| 12.0 | 52 | 1.0% |
|
| 13.0 | 56 | 1.1% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 257.0 | 2 | 0.0% |
|
| 259.0 | 2 | 0.0% |
|
| 261.0 | 3 | 0.1% |
|
| 272.0 | 1 | 0.0% |
|
| 272.0100000000002 | 50 | 1.0% |
|
internet
Numeric
| Distinct count | 5 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.1996 |
|---|---|
| Minimum | 0 |
| Maximum | 4 |
| Zeros (%) | 50.0% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 1 |
| Q3 | 2 |
| 95-th percentile | 4 |
| Maximum | 4 |
| Range | 4 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.4493 |
|---|---|
| Coef of variation | 1.2082 |
| Kurtosis | -0.83856 |
| Mean | 1.1996 |
| MAD | 1.2604 |
| Skewness | 0.80841 |
| Sum | 5998 |
| Variance | 2.1006 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 2498 | 50.0% |
|
| 1.0 | 774 | 15.5% |
|
| 3.0 | 598 | 12.0% |
|
| 4.0 | 585 | 11.7% |
|
| 2.0 | 545 | 10.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 2498 | 50.0% |
|
| 1.0 | 774 | 15.5% |
|
| 2.0 | 545 | 10.9% |
|
| 3.0 | 598 | 12.0% |
|
| 4.0 | 585 | 11.7% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 2498 | 50.0% |
|
| 1.0 | 774 | 15.5% |
|
| 2.0 | 545 | 10.9% |
|
| 3.0 | 598 | 12.0% |
|
| 4.0 | 585 | 11.7% |
|
jobcat
Numeric
| Distinct count | 6 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.7528 |
|---|---|
| Minimum | 1 |
| Maximum | 6 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 1 |
| Median | 2 |
| Q3 | 4 |
| 95-th percentile | 6 |
| Maximum | 6 |
| Range | 5 |
| Interquartile range | 3 |
Descriptive statistics
| Standard deviation | 1.7379 |
|---|---|
| Coef of variation | 0.63132 |
| Kurtosis | -0.75877 |
| Mean | 2.7528 |
| MAD | 1.467 |
| Skewness | 0.79807 |
| Sum | 13764 |
| Variance | 3.0203 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 2.0 | 1640 | 32.8% |
|
| 1.0 | 1388 | 27.8% |
|
| 6.0 | 688 | 13.8% |
|
| 3.0 | 620 | 12.4% |
|
| 5.0 | 452 | 9.0% |
|
| 4.0 | 212 | 4.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 1388 | 27.8% |
|
| 2.0 | 1640 | 32.8% |
|
| 3.0 | 620 | 12.4% |
|
| 4.0 | 212 | 4.2% |
|
| 5.0 | 452 | 9.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2.0 | 1640 | 32.8% |
|
| 3.0 | 620 | 12.4% |
|
| 4.0 | 212 | 4.2% |
|
| 5.0 | 452 | 9.0% |
|
| 6.0 | 688 | 13.8% |
|
jobsat
Numeric
| Distinct count | 5 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.9642 |
|---|---|
| Minimum | 1 |
| Maximum | 5 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 2 |
| Median | 3 |
| Q3 | 4 |
| 95-th percentile | 5 |
| Maximum | 5 |
| Range | 4 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.3795 |
|---|---|
| Coef of variation | 0.46537 |
| Kurtosis | -1.2367 |
| Mean | 2.9642 |
| MAD | 1.1637 |
| Skewness | 0.02675 |
| Sum | 14821 |
| Variance | 1.9029 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 3.0 | 1085 | 21.7% |
|
| 2.0 | 1031 | 20.6% |
|
| 4.0 | 1016 | 20.3% |
|
| 1.0 | 975 | 19.5% |
|
| 5.0 | 893 | 17.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 975 | 19.5% |
|
| 2.0 | 1031 | 20.6% |
|
| 3.0 | 1085 | 21.7% |
|
| 4.0 | 1016 | 20.3% |
|
| 5.0 | 893 | 17.9% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 975 | 19.5% |
|
| 2.0 | 1031 | 20.6% |
|
| 3.0 | 1085 | 21.7% |
|
| 4.0 | 1016 | 20.3% |
|
| 5.0 | 893 | 17.9% |
|
lncardmon
Highly correlated
This variable is highly correlated with callcard and should be ignored for analysis
| Correlation | 0.94918 |
|---|
lncardten
Highly correlated
This variable is highly correlated with lncardmon and should be ignored for analysis
| Correlation | 0.96006 |
|---|
lncreddebt
Highly correlated
This variable is highly correlated with creddebt and should be ignored for analysis
| Correlation | 0.92026 |
|---|
lnequipmon
Highly correlated
This variable is highly correlated with equipmon and should be ignored for analysis
| Correlation | 0.97195 |
|---|
lnequipten
Highly correlated
This variable is highly correlated with lnequipmon and should be ignored for analysis
| Correlation | 0.98331 |
|---|
lninc
Numeric
| Distinct count | 223 |
|---|---|
| Unique (%) | 4.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 3.697 |
|---|---|
| Minimum | 2.1972 |
| Maximum | 5.6058 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 2.1972 |
|---|---|
| 5-th percentile | 2.5649 |
| Q1 | 3.1781 |
| Median | 3.6376 |
| Q3 | 4.2047 |
| 95-th percentile | 4.9904 |
| Maximum | 5.6058 |
| Range | 3.4086 |
| Interquartile range | 1.0266 |
Descriptive statistics
| Standard deviation | 0.7385 |
|---|---|
| Coef of variation | 0.19975 |
| Kurtosis | -0.32905 |
| Mean | 3.697 |
| MAD | 0.59837 |
| Skewness | 0.26668 |
| Sum | 18485 |
| Variance | 0.54538 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 3.091042453358316 | 112 | 2.2% |
|
| 3.367295829986474 | 109 | 2.2% |
|
| 3.2188758248682006 | 108 | 2.2% |
|
| 2.995732273553991 | 102 | 2.0% |
|
| 3.4011973816621555 | 102 | 2.0% |
|
| 3.1354942159291497 | 100 | 2.0% |
|
| 2.8903717578961645 | 100 | 2.0% |
|
| 3.1780538303479458 | 99 | 2.0% |
|
| 3.4657359027997265 | 93 | 1.9% |
|
| 2.772588722239781 | 91 | 1.8% |
|
| Other values (213) | 3984 | 79.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 2.1972245773362196 | 83 | 1.7% |
|
| 2.302585092994046 | 55 | 1.1% |
|
| 2.3978952727983707 | 57 | 1.1% |
|
| 2.4849066497880004 | 52 | 1.0% |
|
| 2.5649493574615367 | 56 | 1.1% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 5.54907608489522 | 2 | 0.0% |
|
| 5.556828061699537 | 2 | 0.0% |
|
| 5.564520407322694 | 3 | 0.1% |
|
| 5.605802066295998 | 1 | 0.0% |
|
| 5.605838763584888 | 50 | 1.0% |
|
lnlongmon
Numeric
| Distinct count | 800 |
|---|---|
| Unique (%) | 16.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.2886 |
|---|---|
| Minimum | 0.61519 |
| Maximum | 4.1775 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 0.61519 |
|---|---|
| 5-th percentile | 1.0647 |
| Q1 | 1.7405 |
| Median | 2.2565 |
| Q3 | 2.8064 |
| 95-th percentile | 3.6043 |
| Maximum | 4.1775 |
| Range | 3.5623 |
| Interquartile range | 1.0659 |
Descriptive statistics
| Standard deviation | 0.76286 |
|---|---|
| Coef of variation | 0.33333 |
| Kurtosis | -0.35393 |
| Mean | 2.2886 |
| MAD | 0.61708 |
| Skewness | 0.16399 |
| Sum | 11443 |
| Variance | 0.58196 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.6151856390902335 | 51 | 1.0% |
|
| 4.1774747946061055 | 50 | 1.0% |
|
| 1.4350845252893227 | 31 | 0.6% |
|
| 1.6094379124341003 | 29 | 0.6% |
|
| 2.066862759472976 | 28 | 0.6% |
|
| 1.7316555451583497 | 25 | 0.5% |
|
| 2.0149030205422647 | 25 | 0.5% |
|
| 1.6389967146756448 | 24 | 0.5% |
|
| 1.3737155789130306 | 24 | 0.5% |
|
| 1.55814461804655 | 24 | 0.5% |
|
| Other values (790) | 4689 | 93.8% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.6151856390902335 | 51 | 1.0% |
|
| 0.6418538861723947 | 6 | 0.1% |
|
| 0.6678293725756554 | 8 | 0.2% |
|
| 0.6931471805599453 | 5 | 0.1% |
|
| 0.7178397931503168 | 13 | 0.3% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 4.1666652238017265 | 1 | 0.0% |
|
| 4.167440117292651 | 1 | 0.0% |
|
| 4.1751562049585145 | 1 | 0.0% |
|
| 4.177459468932607 | 1 | 0.0% |
|
| 4.1774747946061055 | 50 | 1.0% |
|
lnlongten
Highly correlated
This variable is highly correlated with lnlongmon and should be ignored for analysis
| Correlation | 0.92481 |
|---|
lnothdebt
Highly correlated
This variable is highly correlated with othdebt and should be ignored for analysis
| Correlation | 0.90163 |
|---|
lntollmon
Highly correlated
This variable is highly correlated with tollmon and should be ignored for analysis
| Correlation | 0.93703 |
|---|
lntollten
Highly correlated
This variable is highly correlated with lntollmon and should be ignored for analysis
| Correlation | 0.98558 |
|---|
lnwiremon
Highly correlated
This variable is highly correlated with wiremon and should be ignored for analysis
| Correlation | 0.95325 |
|---|
lnwireten
Highly correlated
This variable is highly correlated with lnwiremon and should be ignored for analysis
| Correlation | 0.98726 |
|---|
longmon
Numeric
| Distinct count | 800 |
|---|---|
| Unique (%) | 16.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 13.273 |
|---|---|
| Minimum | 1.85 |
| Maximum | 65.201 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1.85 |
|---|---|
| 5-th percentile | 2.9 |
| Q1 | 5.7 |
| Median | 9.55 |
| Q3 | 16.55 |
| 95-th percentile | 36.758 |
| Maximum | 65.201 |
| Range | 63.351 |
| Interquartile range | 10.85 |
Descriptive statistics
| Standard deviation | 11.552 |
|---|---|
| Coef of variation | 0.87034 |
| Kurtosis | 5.5676 |
| Mean | 13.273 |
| MAD | 8.1347 |
| Skewness | 2.1713 |
| Sum | 66363 |
| Variance | 133.44 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 1.85 | 51 | 1.0% |
|
| 65.20100000000004 | 50 | 1.0% |
|
| 4.2 | 31 | 0.6% |
|
| 5.0 | 29 | 0.6% |
|
| 7.9 | 28 | 0.6% |
|
| 5.65 | 25 | 0.5% |
|
| 7.5 | 25 | 0.5% |
|
| 4.95 | 24 | 0.5% |
|
| 5.15 | 24 | 0.5% |
|
| 3.95 | 24 | 0.5% |
|
| Other values (790) | 4689 | 93.8% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.85 | 51 | 1.0% |
|
| 1.9 | 6 | 0.1% |
|
| 1.95 | 8 | 0.2% |
|
| 2.0 | 5 | 0.1% |
|
| 2.05 | 13 | 0.3% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 64.5 | 1 | 0.0% |
|
| 64.55 | 1 | 0.0% |
|
| 65.05 | 1 | 0.0% |
|
| 65.2 | 1 | 0.0% |
|
| 65.20100000000004 | 50 | 1.0% |
|
longten
Highly correlated
This variable is highly correlated with longmon and should be ignored for analysis
| Correlation | 0.98281 |
|---|
marital
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.4802 |
|---|
| 0.0 |
2599
|
|---|---|
| 1.0 |
2401
|
| Value | Count | Frequency (%) | |
| 0.0 | 2599 | 52.0% |
|
| 1.0 | 2401 | 48.0% |
|
multline
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.4884 |
|---|
| 0.0 |
2558
|
|---|---|
| 1.0 |
2442
|
| Value | Count | Frequency (%) | |
| 0.0 | 2558 | 51.2% |
|
| 1.0 | 2442 | 48.8% |
|
news
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.4726 |
|---|
| 0.0 |
2637
|
|---|---|
| 1.0 |
2363
|
| Value | Count | Frequency (%) | |
| 0.0 | 2637 | 52.7% |
|
| 1.0 | 2363 | 47.3% |
|
othdebt
Numeric
| Distinct count | 4875 |
|---|---|
| Unique (%) | 97.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 3.5225 |
|---|---|
| Minimum | 0.1143 |
| Maximum | 24.064 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 0.1143 |
|---|---|
| 5-th percentile | 0.28769 |
| Q1 | 0.9803 |
| Median | 2.0985 |
| Q3 | 4.3148 |
| 95-th percentile | 11.816 |
| Maximum | 24.064 |
| Range | 23.95 |
| Interquartile range | 3.3345 |
Descriptive statistics
| Standard deviation | 4.2218 |
|---|---|
| Coef of variation | 1.1985 |
| Kurtosis | 8.4039 |
| Mean | 3.5225 |
| MAD | 2.7888 |
| Skewness | 2.686 |
| Sum | 17613 |
| Variance | 17.823 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 24.064260000000036 | 50 | 1.0% |
|
| 0.11429903999999999 | 50 | 1.0% |
|
| 1.112832 | 3 | 0.1% |
|
| 0.9729720000000001 | 2 | 0.0% |
|
| 0.355368 | 2 | 0.0% |
|
| 0.531696 | 2 | 0.0% |
|
| 0.18144 | 2 | 0.0% |
|
| 1.131624 | 2 | 0.0% |
|
| 1.84548 | 2 | 0.0% |
|
| 2.9952 | 2 | 0.0% |
|
| Other values (4865) | 4883 | 97.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.11429903999999999 | 50 | 1.0% |
|
| 0.114312 | 1 | 0.0% |
|
| 0.11438000000000004 | 1 | 0.0% |
|
| 0.11668800000000001 | 1 | 0.0% |
|
| 0.117936 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 23.841252 | 1 | 0.0% |
|
| 23.892297 | 1 | 0.0% |
|
| 23.95712 | 1 | 0.0% |
|
| 24.062447999999996 | 1 | 0.0% |
|
| 24.064260000000036 | 50 | 1.0% |
|
owncd
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.9328 |
|---|
| 1.0 |
4664
|
|---|---|
| 0.0 |
|
| Value | Count | Frequency (%) | |
| 1.0 | 4664 | 93.3% |
|
| 0.0 | 336 | 6.7% |
|
owndvd
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.9136 |
|---|
| 1.0 |
4568
|
|---|---|
| 0.0 |
|
| Value | Count | Frequency (%) | |
| 1.0 | 4568 | 91.4% |
|
| 0.0 | 432 | 8.6% |
|
ownfax
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.1788 |
|---|
| 0.0 |
4106
|
|---|---|
| 1.0 |
894
|
| Value | Count | Frequency (%) | |
| 0.0 | 4106 | 82.1% |
|
| 1.0 | 894 | 17.9% |
|
owngame
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.4748 |
|---|
| 0.0 |
2626
|
|---|---|
| 1.0 |
2374
|
| Value | Count | Frequency (%) | |
| 0.0 | 2626 | 52.5% |
|
| 1.0 | 2374 | 47.5% |
|
ownipod
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.4792 |
|---|
| 0.0 |
2604
|
|---|---|
| 1.0 |
2396
|
| Value | Count | Frequency (%) | |
| 0.0 | 2604 | 52.1% |
|
| 1.0 | 2396 | 47.9% |
|
ownpc
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.6328 |
|---|
| 1.0 |
3164
|
|---|---|
| 0.0 |
1836
|
| Value | Count | Frequency (%) | |
| 1.0 | 3164 | 63.3% |
|
| 0.0 | 1836 | 36.7% |
|
ownpda
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.201 |
|---|
| 0.0 |
3995
|
|---|---|
| 1.0 |
1005
|
| Value | Count | Frequency (%) | |
| 0.0 | 3995 | 79.9% |
|
| 1.0 | 1005 | 20.1% |
|
owntv
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.983 |
|---|
| 1.0 |
4915
|
|---|---|
| 0.0 |
|
| Value | Count | Frequency (%) | |
| 1.0 | 4915 | 98.3% |
|
| 0.0 | 85 | 1.7% |
|
ownvcr
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.9156 |
|---|
| 1.0 |
4578
|
|---|---|
| 0.0 |
|
| Value | Count | Frequency (%) | |
| 1.0 | 4578 | 91.6% |
|
| 0.0 | 422 | 8.4% |
|
pager
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.2436 |
|---|
| 0.0 |
3782
|
|---|---|
| 1.0 |
1218
|
| Value | Count | Frequency (%) | |
| 0.0 | 3782 | 75.6% |
|
| 1.0 | 1218 | 24.4% |
|
pets
Numeric
| Distinct count | 14 |
|---|---|
| Unique (%) | 0.3% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 3.0492 |
|---|---|
| Minimum | 0 |
| Maximum | 13 |
| Zeros (%) | 30.6% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 2 |
| Q3 | 5 |
| 95-th percentile | 10 |
| Maximum | 13 |
| Range | 13 |
| Interquartile range | 5 |
Descriptive statistics
| Standard deviation | 3.3512 |
|---|---|
| Coef of variation | 1.099 |
| Kurtosis | 0.22716 |
| Mean | 3.0492 |
| MAD | 2.7576 |
| Skewness | 1.0747 |
| Sum | 15246 |
| Variance | 11.231 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 1529 | 30.6% |
|
| 1.0 | 780 | 15.6% |
|
| 2.0 | 586 | 11.7% |
|
| 3.0 | 376 | 7.5% |
|
| 5.0 | 298 | 6.0% |
|
| 4.0 | 284 | 5.7% |
|
| 6.0 | 256 | 5.1% |
|
| 7.0 | 246 | 4.9% |
|
| 8.0 | 178 | 3.6% |
|
| 9.0 | 170 | 3.4% |
|
| Other values (4) | 297 | 5.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 1529 | 30.6% |
|
| 1.0 | 780 | 15.6% |
|
| 2.0 | 586 | 11.7% |
|
| 3.0 | 376 | 7.5% |
|
| 4.0 | 284 | 5.7% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 9.0 | 170 | 3.4% |
|
| 10.0 | 115 | 2.3% |
|
| 11.0 | 69 | 1.4% |
|
| 12.0 | 50 | 1.0% |
|
| 13.0 | 63 | 1.3% |
|
pets_birds
Numeric
| Distinct count | 4 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.106 |
|---|---|
| Minimum | 0 |
| Maximum | 3 |
| Zeros (%) | 94.0% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 0 |
| 95-th percentile | 1 |
| Maximum | 3 |
| Range | 3 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.46261 |
|---|---|
| Coef of variation | 4.3642 |
| Kurtosis | 23.827 |
| Mean | 0.106 |
| MAD | 0.1992 |
| Skewness | 4.8348 |
| Sum | 530 |
| Variance | 0.21401 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 4698 | 94.0% |
|
| 1.0 | 144 | 2.9% |
|
| 2.0 | 88 | 1.8% |
|
| 3.0 | 70 | 1.4% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 4698 | 94.0% |
|
| 1.0 | 144 | 2.9% |
|
| 2.0 | 88 | 1.8% |
|
| 3.0 | 70 | 1.4% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 4698 | 94.0% |
|
| 1.0 | 144 | 2.9% |
|
| 2.0 | 88 | 1.8% |
|
| 3.0 | 70 | 1.4% |
|
pets_cats
Numeric
| Distinct count | 4 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.4904 |
|---|---|
| Minimum | 0 |
| Maximum | 3 |
| Zeros (%) | 68.3% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 1 |
| 95-th percentile | 2 |
| Maximum | 3 |
| Range | 3 |
| Interquartile range | 1 |
Descriptive statistics
| Standard deviation | 0.82246 |
|---|---|
| Coef of variation | 1.6771 |
| Kurtosis | 1.654 |
| Mean | 0.4904 |
| MAD | 0.66949 |
| Skewness | 1.6148 |
| Sum | 2452 |
| Variance | 0.67644 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 3413 | 68.3% |
|
| 1.0 | 923 | 18.5% |
|
| 2.0 | 463 | 9.3% |
|
| 3.0 | 201 | 4.0% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3413 | 68.3% |
|
| 1.0 | 923 | 18.5% |
|
| 2.0 | 463 | 9.3% |
|
| 3.0 | 201 | 4.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3413 | 68.3% |
|
| 1.0 | 923 | 18.5% |
|
| 2.0 | 463 | 9.3% |
|
| 3.0 | 201 | 4.0% |
|
pets_dogs
Numeric
| Distinct count | 4 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.3828 |
|---|---|
| Minimum | 0 |
| Maximum | 3 |
| Zeros (%) | 75.2% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 0 |
| 95-th percentile | 2 |
| Maximum | 3 |
| Range | 3 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.75497 |
|---|---|
| Coef of variation | 1.9722 |
| Kurtosis | 3.2005 |
| Mean | 0.3828 |
| MAD | 0.57604 |
| Skewness | 2.0059 |
| Sum | 1914 |
| Variance | 0.56998 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 3762 | 75.2% |
|
| 1.0 | 720 | 14.4% |
|
| 2.0 | 360 | 7.2% |
|
| 3.0 | 158 | 3.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3762 | 75.2% |
|
| 1.0 | 720 | 14.4% |
|
| 2.0 | 360 | 7.2% |
|
| 3.0 | 158 | 3.2% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3762 | 75.2% |
|
| 1.0 | 720 | 14.4% |
|
| 2.0 | 360 | 7.2% |
|
| 3.0 | 158 | 3.2% |
|
pets_freshfish
Numeric
| Distinct count | 12 |
|---|---|
| Unique (%) | 0.2% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.8348 |
|---|---|
| Minimum | 0 |
| Maximum | 11 |
| Zeros (%) | 69.2% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 4 |
| 95-th percentile | 8 |
| Maximum | 11 |
| Range | 11 |
| Interquartile range | 4 |
Descriptive statistics
| Standard deviation | 3.0313 |
|---|---|
| Coef of variation | 1.6521 |
| Kurtosis | 0.56932 |
| Mean | 1.8348 |
| MAD | 2.5465 |
| Skewness | 1.3786 |
| Sum | 9174 |
| Variance | 9.1885 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 3462 | 69.2% |
|
| 5.0 | 261 | 5.2% |
|
| 6.0 | 251 | 5.0% |
|
| 7.0 | 229 | 4.6% |
|
| 4.0 | 222 | 4.4% |
|
| 8.0 | 134 | 2.7% |
|
| 3.0 | 130 | 2.6% |
|
| 9.0 | 110 | 2.2% |
|
| 11.0 | 67 | 1.3% |
|
| 2.0 | 63 | 1.3% |
|
| Other values (2) | 71 | 1.4% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3462 | 69.2% |
|
| 1.0 | 17 | 0.3% |
|
| 2.0 | 63 | 1.3% |
|
| 3.0 | 130 | 2.6% |
|
| 4.0 | 222 | 4.4% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 7.0 | 229 | 4.6% |
|
| 8.0 | 134 | 2.7% |
|
| 9.0 | 110 | 2.2% |
|
| 10.0 | 54 | 1.1% |
|
| 11.0 | 67 | 1.3% |
|
pets_reptiles
Numeric
| Distinct count | 3 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.05 |
|---|---|
| Minimum | 0 |
| Maximum | 2 |
| Zeros (%) | 96.4% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 0 |
| 95-th percentile | 0 |
| Maximum | 2 |
| Range | 2 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.27334 |
|---|---|
| Coef of variation | 5.4668 |
| Kurtosis | 35.606 |
| Mean | 0.05 |
| MAD | 0.09636 |
| Skewness | 5.8926 |
| Sum | 250 |
| Variance | 0.074715 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 4818 | 96.4% |
|
| 1.0 | 114 | 2.3% |
|
| 2.0 | 68 | 1.4% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 4818 | 96.4% |
|
| 1.0 | 114 | 2.3% |
|
| 2.0 | 68 | 1.4% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 4818 | 96.4% |
|
| 1.0 | 114 | 2.3% |
|
| 2.0 | 68 | 1.4% |
|
pets_saltfish
Numeric
| Distinct count | 3 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.0226 |
|---|---|
| Minimum | 0 |
| Maximum | 2 |
| Zeros (%) | 98.8% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 0 |
| 95-th percentile | 0 |
| Maximum | 2 |
| Range | 2 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.21 |
|---|---|
| Coef of variation | 9.2918 |
| Kurtosis | 83.885 |
| Mean | 0.0226 |
| MAD | 0.044676 |
| Skewness | 9.2491 |
| Sum | 113 |
| Variance | 0.044098 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 4942 | 98.8% |
|
| 2.0 | 55 | 1.1% |
|
| 1.0 | 3 | 0.1% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 4942 | 98.8% |
|
| 1.0 | 3 | 0.1% |
|
| 2.0 | 55 | 1.1% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 4942 | 98.8% |
|
| 1.0 | 3 | 0.1% |
|
| 2.0 | 55 | 1.1% |
|
pets_small
Numeric
| Distinct count | 4 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.1028 |
|---|---|
| Minimum | 0 |
| Maximum | 3 |
| Zeros (%) | 95.0% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 0 |
| 95-th percentile | 1 |
| Maximum | 3 |
| Range | 3 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.4832 |
|---|---|
| Coef of variation | 4.7004 |
| Kurtosis | 24.434 |
| Mean | 0.1028 |
| MAD | 0.19528 |
| Skewness | 4.9785 |
| Sum | 514 |
| Variance | 0.23348 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 4749 | 95.0% |
|
| 3.0 | 90 | 1.8% |
|
| 2.0 | 83 | 1.7% |
|
| 1.0 | 78 | 1.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 4749 | 95.0% |
|
| 1.0 | 78 | 1.6% |
|
| 2.0 | 83 | 1.7% |
|
| 3.0 | 90 | 1.8% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 4749 | 95.0% |
|
| 1.0 | 78 | 1.6% |
|
| 2.0 | 83 | 1.7% |
|
| 3.0 | 90 | 1.8% |
|
polcontrib
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.2384 |
|---|
| 0.0 |
3808
|
|---|---|
| 1.0 |
1192
|
| Value | Count | Frequency (%) | |
| 0.0 | 3808 | 76.2% |
|
| 1.0 | 1192 | 23.8% |
|
polparty
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.3814 |
|---|
| 0.0 |
3093
|
|---|---|
| 1.0 |
1907
|
| Value | Count | Frequency (%) | |
| 0.0 | 3093 | 61.9% |
|
| 1.0 | 1907 | 38.1% |
|
polview
Numeric
| Distinct count | 7 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 4.0886 |
|---|---|
| Minimum | 1 |
| Maximum | 7 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 2 |
| Q1 | 3 |
| Median | 4 |
| Q3 | 5 |
| 95-th percentile | 6 |
| Maximum | 7 |
| Range | 6 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.3871 |
|---|---|
| Coef of variation | 0.33925 |
| Kurtosis | -0.5312 |
| Mean | 4.0886 |
| MAD | 1.0702 |
| Skewness | -0.19834 |
| Sum | 20443 |
| Variance | 1.9239 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 4.0 | 1733 | 34.7% |
|
| 5.0 | 893 | 17.9% |
|
| 6.0 | 843 | 16.9% |
|
| 3.0 | 659 | 13.2% |
|
| 2.0 | 623 | 12.5% |
|
| 1.0 | 163 | 3.3% |
|
| 7.0 | 86 | 1.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 163 | 3.3% |
|
| 2.0 | 623 | 12.5% |
|
| 3.0 | 659 | 13.2% |
|
| 4.0 | 1733 | 34.7% |
|
| 5.0 | 893 | 17.9% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 3.0 | 659 | 13.2% |
|
| 4.0 | 1733 | 34.7% |
|
| 5.0 | 893 | 17.9% |
|
| 6.0 | 843 | 16.9% |
|
| 7.0 | 86 | 1.7% |
|
reason
Numeric
| Distinct count | 5 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 7.6368 |
|---|---|
| Minimum | 1 |
| Maximum | 9 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 9 |
| Median | 9 |
| Q3 | 9 |
| 95-th percentile | 9 |
| Maximum | 9 |
| Range | 8 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 2.85 |
|---|---|
| Coef of variation | 0.37319 |
| Kurtosis | 0.84805 |
| Mean | 7.6368 |
| MAD | 2.2095 |
| Skewness | -1.6586 |
| Sum | 38184 |
| Variance | 8.1225 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 9.0 | 4052 | 81.0% |
|
| 1.0 | 447 | 8.9% |
|
| 2.0 | 339 | 6.8% |
|
| 4.0 | 105 | 2.1% |
|
| 3.0 | 57 | 1.1% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 447 | 8.9% |
|
| 2.0 | 339 | 6.8% |
|
| 3.0 | 57 | 1.1% |
|
| 4.0 | 105 | 2.1% |
|
| 9.0 | 4052 | 81.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 447 | 8.9% |
|
| 2.0 | 339 | 6.8% |
|
| 3.0 | 57 | 1.1% |
|
| 4.0 | 105 | 2.1% |
|
| 9.0 | 4052 | 81.0% |
|
region
Numeric
| Distinct count | 5 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 3.0014 |
|---|---|
| Minimum | 1 |
| Maximum | 5 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 2 |
| Median | 3 |
| Q3 | 4 |
| 95-th percentile | 5 |
| Maximum | 5 |
| Range | 4 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.4218 |
|---|---|
| Coef of variation | 0.4737 |
| Kurtosis | -1.309 |
| Mean | 3.0014 |
| MAD | 1.2069 |
| Skewness | 0.0050525 |
| Sum | 15007 |
| Variance | 2.0214 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 5.0 | 1027 | 20.5% |
|
| 1.0 | 1009 | 20.2% |
|
| 3.0 | 1003 | 20.1% |
|
| 2.0 | 995 | 19.9% |
|
| 4.0 | 966 | 19.3% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 1009 | 20.2% |
|
| 2.0 | 995 | 19.9% |
|
| 3.0 | 1003 | 20.1% |
|
| 4.0 | 966 | 19.3% |
|
| 5.0 | 1027 | 20.5% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 1009 | 20.2% |
|
| 2.0 | 995 | 19.9% |
|
| 3.0 | 1003 | 20.1% |
|
| 4.0 | 966 | 19.3% |
|
| 5.0 | 1027 | 20.5% |
|
reside
Numeric
| Distinct count | 6 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.1942 |
|---|---|
| Minimum | 1 |
| Maximum | 6 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 1 |
| Median | 2 |
| Q3 | 3 |
| 95-th percentile | 5 |
| Maximum | 6 |
| Range | 5 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.3615 |
|---|---|
| Coef of variation | 0.6205 |
| Kurtosis | 0.29171 |
| Mean | 2.1942 |
| MAD | 1.086 |
| Skewness | 1.0938 |
| Sum | 10971 |
| Variance | 1.8537 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 1.0 | 2035 | 40.7% |
|
| 2.0 | 1467 | 29.3% |
|
| 3.0 | 552 | 11.0% |
|
| 4.0 | 521 | 10.4% |
|
| 5.0 | 288 | 5.8% |
|
| 6.0 | 137 | 2.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 2035 | 40.7% |
|
| 2.0 | 1467 | 29.3% |
|
| 3.0 | 552 | 11.0% |
|
| 4.0 | 521 | 10.4% |
|
| 5.0 | 288 | 5.8% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2.0 | 1467 | 29.3% |
|
| 3.0 | 552 | 11.0% |
|
| 4.0 | 521 | 10.4% |
|
| 5.0 | 288 | 5.8% |
|
| 6.0 | 137 | 2.7% |
|
response_01
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.0836 |
|---|
| 0.0 |
4582
|
|---|---|
| 1.0 |
|
| Value | Count | Frequency (%) | |
| 0.0 | 4582 | 91.6% |
|
| 1.0 | 418 | 8.4% |
|
response_02
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.1298 |
|---|
| 0.0 |
4351
|
|---|---|
| 1.0 |
|
| Value | Count | Frequency (%) | |
| 0.0 | 4351 | 87.0% |
|
| 1.0 | 649 | 13.0% |
|
response_03
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.1026 |
|---|
| 0.0 |
4487
|
|---|---|
| 1.0 |
|
| Value | Count | Frequency (%) | |
| 0.0 | 4487 | 89.7% |
|
| 1.0 | 513 | 10.3% |
|
retire
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.1476 |
|---|
| 0.0 |
4262
|
|---|---|
| 1.0 |
|
| Value | Count | Frequency (%) | |
| 0.0 | 4262 | 85.2% |
|
| 1.0 | 738 | 14.8% |
|
spoused
Highly correlated
This variable is highly correlated with marital and should be ignored for analysis
| Correlation | 0.95763 |
|---|
spousedcat
Highly correlated
This variable is highly correlated with spoused and should be ignored for analysis
| Correlation | 0.98315 |
|---|
telecommute
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.188 |
|---|
| 0.0 |
4060
|
|---|---|
| 1.0 |
940
|
| Value | Count | Frequency (%) | |
| 0.0 | 4060 | 81.2% |
|
| 1.0 | 940 | 18.8% |
|
tenure
Numeric
| Distinct count | 72 |
|---|---|
| Unique (%) | 1.4% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 38.205 |
|---|---|
| Minimum | 1 |
| Maximum | 72 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 4 |
| Q1 | 18 |
| Median | 38 |
| Q3 | 59 |
| 95-th percentile | 72 |
| Maximum | 72 |
| Range | 71 |
| Interquartile range | 41 |
Descriptive statistics
| Standard deviation | 22.661 |
|---|---|
| Coef of variation | 0.59313 |
| Kurtosis | -1.3307 |
| Mean | 38.205 |
| MAD | 19.914 |
| Skewness | -0.03621 |
| Sum | 191030 |
| Variance | 513.52 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 72.0 | 251 | 5.0% |
|
| 71.0 | 147 | 2.9% |
|
| 70.0 | 98 | 2.0% |
|
| 69.0 | 94 | 1.9% |
|
| 5.0 | 94 | 1.9% |
|
| 9.0 | 88 | 1.8% |
|
| 7.0 | 87 | 1.7% |
|
| 2.0 | 81 | 1.6% |
|
| 64.0 | 79 | 1.6% |
|
| 11.0 | 79 | 1.6% |
|
| Other values (62) | 3902 | 78.0% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 75 | 1.5% |
|
| 2.0 | 81 | 1.6% |
|
| 3.0 | 75 | 1.5% |
|
| 4.0 | 71 | 1.4% |
|
| 5.0 | 94 | 1.9% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 68.0 | 79 | 1.6% |
|
| 69.0 | 94 | 1.9% |
|
| 70.0 | 98 | 2.0% |
|
| 71.0 | 147 | 2.9% |
|
| 72.0 | 251 | 5.0% |
|
tollfree
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.4756 |
|---|
| 0.0 |
2622
|
|---|---|
| 1.0 |
2378
|
| Value | Count | Frequency (%) | |
| 0.0 | 2622 | 52.4% |
|
| 1.0 | 2378 | 47.6% |
|
tollmon
Numeric
| Distinct count | 197 |
|---|---|
| Unique (%) | 3.9% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 13.14 |
|---|---|
| Minimum | 0 |
| Maximum | 58.753 |
| Zeros (%) | 52.4% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 24.5 |
| 95-th percentile | 43.5 |
| Maximum | 58.753 |
| Range | 58.753 |
| Interquartile range | 24.5 |
Descriptive statistics
| Standard deviation | 15.811 |
|---|---|
| Coef of variation | 1.2033 |
| Kurtosis | -0.24916 |
| Mean | 13.14 |
| MAD | 13.856 |
| Skewness | 0.87308 |
| Sum | 65700 |
| Variance | 250 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 2622 | 52.4% |
|
| 58.752500000000055 | 50 | 1.0% |
|
| 22.75 | 33 | 0.7% |
|
| 18.0 | 33 | 0.7% |
|
| 24.0 | 32 | 0.6% |
|
| 23.0 | 31 | 0.6% |
|
| 23.75 | 30 | 0.6% |
|
| 22.0 | 30 | 0.6% |
|
| 20.0 | 29 | 0.6% |
|
| 19.0 | 29 | 0.6% |
|
| Other values (187) | 2081 | 41.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 2622 | 52.4% |
|
| 8.0 | 1 | 0.0% |
|
| 8.5 | 2 | 0.0% |
|
| 8.75 | 2 | 0.0% |
|
| 9.0 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 57.75 | 4 | 0.1% |
|
| 58.0 | 3 | 0.1% |
|
| 58.25 | 1 | 0.0% |
|
| 58.75 | 3 | 0.1% |
|
| 58.752500000000055 | 50 | 1.0% |
|
tollten
Numeric
| Distinct count | 2274 |
|---|---|
| Unique (%) | 45.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 570.13 |
|---|---|
| Minimum | 0 |
| Maximum | 3977.3 |
| Zeros (%) | 52.4% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 885.45 |
| 95-th percentile | 2620.2 |
| Maximum | 3977.3 |
| Range | 3977.3 |
| Interquartile range | 885.45 |
Descriptive statistics
| Standard deviation | 914.74 |
|---|---|
| Coef of variation | 1.6044 |
| Kurtosis | 2.7641 |
| Mean | 570.13 |
| MAD | 701.39 |
| Skewness | 1.824 |
| Sum | 2850700 |
| Variance | 836760 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 2622 | 52.4% |
|
| 3977.270500000003 | 50 | 1.0% |
|
| 10.0 | 3 | 0.1% |
|
| 16.75 | 3 | 0.1% |
|
| 1480.5 | 3 | 0.1% |
|
| 763.75 | 2 | 0.0% |
|
| 1727.75 | 2 | 0.0% |
|
| 349.75 | 2 | 0.0% |
|
| 1031.4 | 2 | 0.0% |
|
| 732.3 | 2 | 0.0% |
|
| Other values (2264) | 2309 | 46.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 2622 | 52.4% |
|
| 8.75 | 1 | 0.0% |
|
| 10.0 | 3 | 0.1% |
|
| 10.5 | 1 | 0.0% |
|
| 10.75 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 3949.45 | 1 | 0.0% |
|
| 3967.3 | 1 | 0.0% |
|
| 3973.75 | 1 | 0.0% |
|
| 3977.15 | 1 | 0.0% |
|
| 3977.270500000003 | 50 | 1.0% |
|
total_benefit
Numeric
| Distinct count | 7 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 5.0398 |
|---|---|
| Minimum | 2 |
| Maximum | 8 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 2 |
|---|---|
| 5-th percentile | 2 |
| Q1 | 4 |
| Median | 5 |
| Q3 | 6 |
| 95-th percentile | 8 |
| Maximum | 8 |
| Range | 6 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.5687 |
|---|---|
| Coef of variation | 0.31126 |
| Kurtosis | -0.64886 |
| Mean | 5.0398 |
| MAD | 1.2474 |
| Skewness | -0.0079873 |
| Sum | 25199 |
| Variance | 2.4607 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 5.0 | 1251 | 25.0% |
|
| 6.0 | 978 | 19.6% |
|
| 4.0 | 946 | 18.9% |
|
| 7.0 | 615 | 12.3% |
|
| 3.0 | 593 | 11.9% |
|
| 8.0 | 329 | 6.6% |
|
| 2.0 | 288 | 5.8% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 2.0 | 288 | 5.8% |
|
| 3.0 | 593 | 11.9% |
|
| 4.0 | 946 | 18.9% |
|
| 5.0 | 1251 | 25.0% |
|
| 6.0 | 978 | 19.6% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 4.0 | 946 | 18.9% |
|
| 5.0 | 1251 | 25.0% |
|
| 6.0 | 978 | 19.6% |
|
| 7.0 | 615 | 12.3% |
|
| 8.0 | 329 | 6.6% |
|
total_fee
Numeric
| Distinct count | 3 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.377 |
|---|---|
| Minimum | 0 |
| Maximum | 2 |
| Zeros (%) | 65.9% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 1 |
| 95-th percentile | 1 |
| Maximum | 2 |
| Range | 2 |
| Interquartile range | 1 |
Descriptive statistics
| Standard deviation | 0.55365 |
|---|---|
| Coef of variation | 1.4686 |
| Kurtosis | 0.27771 |
| Mean | 0.377 |
| MAD | 0.49674 |
| Skewness | 1.1296 |
| Sum | 1885 |
| Variance | 0.30653 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 3294 | 65.9% |
|
| 1.0 | 1527 | 30.5% |
|
| 2.0 | 179 | 3.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3294 | 65.9% |
|
| 1.0 | 1527 | 30.5% |
|
| 2.0 | 179 | 3.6% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3294 | 65.9% |
|
| 1.0 | 1527 | 30.5% |
|
| 2.0 | 179 | 3.6% |
|
total_items
Numeric
| Distinct count | 29 |
|---|---|
| Unique (%) | 0.6% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 14.832 |
|---|---|
| Minimum | 2 |
| Maximum | 30 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 2 |
|---|---|
| 5-th percentile | 8 |
| Q1 | 12 |
| Median | 15 |
| Q3 | 18 |
| 95-th percentile | 22 |
| Maximum | 30 |
| Range | 28 |
| Interquartile range | 6 |
Descriptive statistics
| Standard deviation | 4.3428 |
|---|---|
| Coef of variation | 0.2928 |
| Kurtosis | 0.06111 |
| Mean | 14.832 |
| MAD | 3.4409 |
| Skewness | 0.041318 |
| Sum | 74160 |
| Variance | 18.86 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 16.0 | 473 | 9.5% |
|
| 15.0 | 470 | 9.4% |
|
| 14.0 | 456 | 9.1% |
|
| 13.0 | 411 | 8.2% |
|
| 12.0 | 391 | 7.8% |
|
| 17.0 | 384 | 7.7% |
|
| 18.0 | 348 | 7.0% |
|
| 11.0 | 330 | 6.6% |
|
| 19.0 | 278 | 5.6% |
|
| 10.0 | 246 | 4.9% |
|
| Other values (19) | 1213 | 24.3% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 2.0 | 10 | 0.2% |
|
| 3.0 | 19 | 0.4% |
|
| 4.0 | 30 | 0.6% |
|
| 5.0 | 33 | 0.7% |
|
| 6.0 | 43 | 0.9% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 26.0 | 24 | 0.5% |
|
| 27.0 | 11 | 0.2% |
|
| 28.0 | 2 | 0.0% |
|
| 29.0 | 3 | 0.1% |
|
| 30.0 | 2 | 0.0% |
|
total_spent
Numeric
| Distinct count | 4840 |
|---|---|
| Unique (%) | 96.8% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 6.0036 |
|---|---|
| Minimum | 3.7092 |
| Maximum | 7.5642 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 3.7092 |
|---|---|
| 5-th percentile | 4.8963 |
| Q1 | 5.6214 |
| Median | 6.0265 |
| Q3 | 6.4225 |
| 95-th percentile | 7.0407 |
| Maximum | 7.5642 |
| Range | 3.855 |
| Interquartile range | 0.80111 |
Descriptive statistics
| Standard deviation | 0.64474 |
|---|---|
| Coef of variation | 0.10739 |
| Kurtosis | 0.45687 |
| Mean | 6.0036 |
| MAD | 0.50167 |
| Skewness | -0.33839 |
| Sum | 30018 |
| Variance | 0.41569 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 3.709159908409081 | 12 | 0.2% |
|
| 7.56420579828806 | 11 | 0.2% |
|
| 5.926819353614451 | 3 | 0.1% |
|
| 6.239027502962935 | 3 | 0.1% |
|
| 6.070021536263528 | 3 | 0.1% |
|
| 5.252221070655064 | 3 | 0.1% |
|
| 6.413081836886114 | 3 | 0.1% |
|
| 6.197624693381778 | 2 | 0.0% |
|
| 5.607748699786767 | 2 | 0.0% |
|
| 6.186949090428217 | 2 | 0.0% |
|
| Other values (4830) | 4956 | 99.1% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 3.709159908409081 | 12 | 0.2% |
|
| 3.8375149532530846 | 1 | 0.0% |
|
| 3.895883460844997 | 1 | 0.0% |
|
| 3.9154172384961616 | 1 | 0.0% |
|
| 3.9189986191645714 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 7.513702264727489 | 1 | 0.0% |
|
| 7.518205290233824 | 1 | 0.0% |
|
| 7.518703411971194 | 1 | 0.0% |
|
| 7.563283558802034 | 1 | 0.0% |
|
| 7.56420579828806 | 11 | 0.2% |
|
total_tenure
Highly correlated
This variable is highly correlated with tenure and should be ignored for analysis
| Correlation | 0.92561 |
|---|
townsize
Numeric
| Distinct count | 5 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.6874 |
|---|---|
| Minimum | 1 |
| Maximum | 5 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 1 |
| Median | 3 |
| Q3 | 4 |
| 95-th percentile | 5 |
| Maximum | 5 |
| Range | 4 |
| Interquartile range | 3 |
Descriptive statistics
| Standard deviation | 1.4262 |
|---|---|
| Coef of variation | 0.5307 |
| Kurtosis | -1.2632 |
| Mean | 2.6874 |
| MAD | 1.2581 |
| Skewness | 0.27659 |
| Sum | 13437 |
| Variance | 2.0341 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 1.0 | 1437 | 28.7% |
|
| 2.0 | 1048 | 21.0% |
|
| 3.0 | 907 | 18.1% |
|
| 4.0 | 857 | 17.1% |
|
| 5.0 | 751 | 15.0% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 1437 | 28.7% |
|
| 2.0 | 1048 | 21.0% |
|
| 3.0 | 907 | 18.1% |
|
| 4.0 | 857 | 17.1% |
|
| 5.0 | 751 | 15.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.0 | 1437 | 28.7% |
|
| 2.0 | 1048 | 21.0% |
|
| 3.0 | 907 | 18.1% |
|
| 4.0 | 857 | 17.1% |
|
| 5.0 | 751 | 15.0% |
|
union
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.1512 |
|---|
| 0.0 |
4244
|
|---|---|
| 1.0 |
|
| Value | Count | Frequency (%) | |
| 0.0 | 4244 | 84.9% |
|
| 1.0 | 756 | 15.1% |
|
voice
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.303 |
|---|
| 0.0 |
3485
|
|---|---|
| 1.0 |
1515
|
| Value | Count | Frequency (%) | |
| 0.0 | 3485 | 69.7% |
|
| 1.0 | 1515 | 30.3% |
|
vote
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.518 |
|---|
| 1.0 |
2590
|
|---|---|
| 0.0 |
2410
|
| Value | Count | Frequency (%) | |
| 1.0 | 2590 | 51.8% |
|
| 0.0 | 2410 | 48.2% |
|
wireless
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.2688 |
|---|
| 0.0 |
3656
|
|---|---|
| 1.0 |
1344
|
| Value | Count | Frequency (%) | |
| 0.0 | 3656 | 73.1% |
|
| 1.0 | 1344 | 26.9% |
|
wiremon
Highly correlated
This variable is highly correlated with wireless and should be ignored for analysis
| Correlation | 0.91316 |
|---|
wireten
Numeric
| Distinct count | 1279 |
|---|---|
| Unique (%) | 25.6% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 409.96 |
|---|---|
| Minimum | 0 |
| Maximum | 4530.2 |
| Zeros (%) | 73.1% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 89.962 |
| 95-th percentile | 2687.9 |
| Maximum | 4530.2 |
| Range | 4530.2 |
| Interquartile range | 89.962 |
Descriptive statistics
| Standard deviation | 930.01 |
|---|---|
| Coef of variation | 2.2685 |
| Kurtosis | 6.1654 |
| Mean | 409.96 |
| MAD | 627.77 |
| Skewness | 2.5701 |
| Sum | 2049800 |
| Variance | 864910 |
| Memory size | 39.1 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 3656 | 73.1% |
|
| 4530.186000000002 | 50 | 1.0% |
|
| 2182.05 | 2 | 0.0% |
|
| 2386.25 | 2 | 0.0% |
|
| 2323.8 | 2 | 0.0% |
|
| 1062.75 | 2 | 0.0% |
|
| 2049.85 | 2 | 0.0% |
|
| 1199.2 | 2 | 0.0% |
|
| 20.9 | 2 | 0.0% |
|
| 183.1 | 2 | 0.0% |
|
| Other values (1269) | 1278 | 25.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3656 | 73.1% |
|
| 12.7 | 1 | 0.0% |
|
| 14.55 | 1 | 0.0% |
|
| 14.6 | 1 | 0.0% |
|
| 14.9 | 2 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 4494.9 | 1 | 0.0% |
|
| 4506.6 | 1 | 0.0% |
|
| 4516.4 | 1 | 0.0% |
|
| 4530.1 | 1 | 0.0% |
|
| 4530.186000000002 | 50 | 1.0% |
|
| region | townsize | gender | age | agecat | ed | edcat | jobcat | union | employ | empcat | retire | income | lninc | inccat | debtinc | creddebt | lncreddebt | othdebt | lnothdebt | default | jobsat | marital | spoused | spousedcat | reside | pets | pets_cats | pets_dogs | pets_birds | pets_reptiles | pets_small | pets_saltfish | pets_freshfish | homeown | hometype | address | addresscat | cars | carown | cartype | carvalue | carcatvalue | carbought | carbuy | commute | commutecat | commutetime | commutecar | commutemotorcycle | commutecarpool | commutebus | commuterail | commutepublic | commutebike | commutewalk | commutenonmotor | telecommute | reason | polview | polparty | polcontrib | vote | card | cardtype | cardtenurecat | card2 | card2type | card2tenurecat | active | bfast | tenure | churn | longmon | lnlongmon | longten | lnlongten | tollfree | tollmon | lntollmon | tollten | lntollten | equip | equipmon | lnequipmon | equipten | lnequipten | callcard | cardmon | lncardmon | cardten | lncardten | wireless | wiremon | lnwiremon | wireten | lnwireten | multline | voice | pager | internet | callid | callwait | forward | confer | ebill | owntv | hourstv | ownvcr | owndvd | owncd | ownpda | ownpc | ownipod | owngame | ownfax | news | response_01 | response_02 | response_03 | total_spent | total_benefit | total_fee | total_tenure | total_items | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 2.0 | 1.0 | 20.0 | 2.0 | 15.0 | 3.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 31.0 | 3.433987 | 2.0 | 11.1 | 1.200909 | 0.788870 | 2.240091 | 1.175601 | 1.0 | 1.0 | 0.0 | -1.0 | -1.0 | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 0.0 | 1.0 | 2.0 | 1.0 | 0.0 | 14.3 | 1.0 | 0.0 | 0.0 | 8.0 | 4.0 | 22.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 9.0 | 6.0 | 1.0 | 0.0 | 1.0 | 3.0 | 1.0 | 2.0 | 5.0 | 3.0 | 2.0 | 0.0 | 3.0 | 5.0 | 1.0 | 6.50 | 1.871802 | 34.40 | 3.566712 | 1.0 | 29.0 | 3.401197 | 161.05 | 5.087905 | 1.0 | 29.50 | 3.417727 | 126.1 | 4.844974 | 1.0 | 14.25 | 2.724580 | 60.0 | 4.110874 | 0.0 | 0.00 | 0.000000 | 0.00 | 0.000000 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 13.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 5.007029 | 2.0 | 0.0 | 5.0 | 9.0 |
| 1 | 5.0 | 5.0 | 0.0 | 22.0 | 2.0 | 17.0 | 4.0 | 2.0 | 0.0 | 0.0 | 1.0 | 0.0 | 15.0 | 2.708050 | 1.0 | 18.6 | 1.222020 | 0.798417 | 1.567980 | 0.943120 | 1.0 | 1.0 | 0.0 | -1.0 | -1.0 | 2.0 | 6.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 | 1.0 | 3.0 | 2.0 | 1.0 | 2.0 | 1.0 | 1.0 | 6.8 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 29.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 9.0 | 4.0 | 1.0 | 0.0 | 0.0 | 2.0 | 4.0 | 2.0 | 4.0 | 1.0 | 2.0 | 1.0 | 1.0 | 39.0 | 0.0 | 8.90 | 2.186051 | 330.60 | 5.803929 | 0.0 | 0.0 | 0.000000 | 0.00 | 0.000000 | 1.0 | 54.85 | 4.022670 | 1975.0 | 7.588830 | 1.0 | 16.00 | 2.833213 | 610.0 | 6.415097 | 1.0 | 45.65 | 3.842673 | 1683.55 | 7.429254 | 1.0 | 1.0 | 1.0 | 4.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 18.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 4.350794 | 4.0 | 0.0 | 8.0 | 7.0 |
| 2 | 3.0 | 4.0 | 1.0 | 67.0 | 6.0 | 14.0 | 2.0 | 2.0 | 0.0 | 16.0 | 5.0 | 0.0 | 35.0 | 3.555348 | 2.0 | 9.9 | 0.928620 | 0.656805 | 2.536380 | 1.263104 | 0.0 | 4.0 | 1.0 | 13.0 | 2.0 | 3.0 | 3.0 | 2.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 30.0 | 5.0 | 3.0 | 1.0 | 1.0 | 18.8 | 1.0 | 0.0 | 1.0 | 4.0 | 3.0 | 24.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 5.0 | 1.0 | 0.0 | 0.0 | 2.0 | 1.0 | 5.0 | 4.0 | 1.0 | 5.0 | 0.0 | 3.0 | 65.0 | 0.0 | 28.40 | 3.346389 | 1858.35 | 7.527982 | 0.0 | 0.0 | 0.000000 | 0.00 | 0.000000 | 0.0 | 0.00 | 0.000000 | 0.0 | 0.000000 | 1.0 | 23.00 | 3.178054 | 1410.0 | 7.252054 | 0.0 | 0.00 | 0.000000 | 0.00 | 0.000000 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 21.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 5.886021 | 7.0 | 0.0 | 60.0 | 16.0 |
| 3 | 4.0 | 3.0 | 0.0 | 23.0 | 2.0 | 16.0 | 3.0 | 2.0 | 0.0 | 0.0 | 1.0 | 0.0 | 20.0 | 2.995732 | 1.0 | 5.7 | 0.033160 | 0.032622 | 1.117200 | 0.750094 | 1.0 | 2.0 | 1.0 | 18.0 | 4.0 | 5.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 3.0 | 3.0 | 2.0 | 3.0 | 1.0 | 1.0 | 8.7 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 38.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 9.0 | 3.0 | 0.0 | 0.0 | 0.0 | 2.0 | 1.0 | 2.0 | 3.0 | 2.0 | 2.0 | 1.0 | 1.0 | 36.0 | 0.0 | 6.00 | 1.791759 | 199.45 | 5.300565 | 0.0 | 0.0 | 0.000000 | 0.00 | 0.000000 | 0.0 | 0.00 | 0.000000 | 0.0 | 0.000000 | 1.0 | 21.00 | 3.091042 | 685.0 | 6.530878 | 0.0 | 0.00 | 0.000000 | 0.00 | 0.000000 | 1.0 | 0.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 26.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 5.884464 | 8.0 | 0.0 | 10.0 | 18.0 |
| 4 | 2.0 | 2.0 | 0.0 | 26.0 | 3.0 | 16.0 | 3.0 | 2.0 | 0.0 | 1.0 | 1.0 | 0.0 | 23.0 | 3.135494 | 1.0 | 1.7 | 0.214659 | 0.194463 | 0.176341 | 0.162409 | 0.0 | 1.0 | 1.0 | 13.0 | 2.0 | 4.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 3.0 | 2.0 | 1.0 | 0.0 | 1.0 | 10.6 | 1.0 | 0.0 | 1.0 | 6.0 | 3.0 | 32.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 9.0 | 4.0 | 0.0 | 0.0 | 0.0 | 4.0 | 2.0 | 3.0 | 1.0 | 3.0 | 3.0 | 1.0 | 3.0 | 21.0 | 0.0 | 3.05 | 1.115142 | 74.10 | 4.318821 | 1.0 | 16.5 | 2.862201 | 387.70 | 5.962808 | 0.0 | 0.00 | 0.000000 | 0.0 | 0.000000 | 1.0 | 17.25 | 2.904165 | 360.0 | 5.888878 | 1.0 | 19.05 | 2.998229 | 410.80 | 6.020538 | 0.0 | 1.0 | 0.0 | 3.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 27.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 6.230147 | 3.0 | 0.0 | 17.0 | 15.0 |
def func_corr(dataset,threshold=0.9,remove_negative_corr=False):
matrix_corr = dataset.corr()
#select whether to remove variables with negative high correlation
if remove_negative_corr:
matrix_corr=np.abs(matrix_corr)
#selecting upper triangle part of matrix
upper_tri = matrix_corr.where(np.triu(np.ones(matrix_corr.shape),k=1).astype(np.bool))
#get a list of columns with high correlation
col_highcorr = [column for column in upper_tri if any(upper_tri[column]>threshold)]
return col_highcorr
high_corr_list = func_corr(dataset.drop('total_spent',axis=1),threshold=0.9,remove_negative_corr=True)
dataset.drop(high_corr_list,axis=1,inplace=True)
all_columns = "+".join(dataset.columns.difference( ['total_spent'] ))
formula='total_spent~'+all_columns
lm=smf.ols(formula=formula,data=dataset).fit()
lm.summary()
| Dep. Variable: | total_spent | R-squared: | 0.643 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.636 |
| Method: | Least Squares | F-statistic: | 88.20 |
| Date: | Tue, 12 Mar 2019 | Prob (F-statistic): | 0.00 |
| Time: | 19:51:54 | Log-Likelihood: | -2325.4 |
| No. Observations: | 5000 | AIC: | 4853. |
| Df Residuals: | 4899 | BIC: | 5511. |
| Df Model: | 100 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 4.2510 | 0.108 | 39.522 | 0.000 | 4.040 | 4.462 |
| active | 0.0049 | 0.012 | 0.423 | 0.672 | -0.018 | 0.028 |
| address | -0.0008 | 0.001 | -0.903 | 0.366 | -0.003 | 0.001 |
| age | -0.0007 | 0.001 | -0.872 | 0.383 | -0.002 | 0.001 |
| bfast | 0.0039 | 0.007 | 0.554 | 0.579 | -0.010 | 0.018 |
| callcard | -0.0226 | 0.020 | -1.110 | 0.267 | -0.063 | 0.017 |
| callid | 0.0190 | 0.017 | 1.150 | 0.250 | -0.013 | 0.051 |
| callwait | -0.0064 | 0.016 | -0.389 | 0.697 | -0.038 | 0.026 |
| carbought | -0.0014 | 0.012 | -0.112 | 0.911 | -0.026 | 0.023 |
| carbuy | 0.0147 | 0.012 | 1.226 | 0.220 | -0.009 | 0.038 |
| carcatvalue | -0.0018 | 0.015 | -0.116 | 0.908 | -0.031 | 0.028 |
| card | -0.1281 | 0.005 | -25.088 | 0.000 | -0.138 | -0.118 |
| card2 | -0.0681 | 0.005 | -13.164 | 0.000 | -0.078 | -0.058 |
| card2type | 0.0062 | 0.005 | 1.243 | 0.214 | -0.004 | 0.016 |
| cardmon | -0.0016 | 0.001 | -1.328 | 0.184 | -0.004 | 0.001 |
| cardten | 4.296e-05 | 2.27e-05 | 1.892 | 0.059 | -1.55e-06 | 8.75e-05 |
| cardtenurecat | -0.0056 | 0.009 | -0.598 | 0.550 | -0.024 | 0.013 |
| cardtype | 0.0053 | 0.005 | 1.061 | 0.289 | -0.004 | 0.015 |
| carown | 0.0208 | 0.014 | 1.434 | 0.152 | -0.008 | 0.049 |
| cars | 0.0061 | 0.006 | 0.964 | 0.335 | -0.006 | 0.018 |
| cartype | -0.0148 | 0.011 | -1.318 | 0.188 | -0.037 | 0.007 |
| carvalue | -0.0008 | 0.001 | -0.938 | 0.348 | -0.003 | 0.001 |
| churn | 0.0246 | 0.015 | 1.658 | 0.097 | -0.005 | 0.054 |
| commute | 0.0034 | 0.003 | 1.116 | 0.264 | -0.003 | 0.009 |
| commutebike | -0.0023 | 0.017 | -0.136 | 0.892 | -0.036 | 0.031 |
| commutebus | -0.0092 | 0.012 | -0.799 | 0.425 | -0.032 | 0.013 |
| commutecar | 0.0109 | 0.018 | 0.598 | 0.550 | -0.025 | 0.047 |
| commutecarpool | 0.0085 | 0.013 | 0.672 | 0.501 | -0.016 | 0.033 |
| commutemotorcycle | -0.0033 | 0.018 | -0.182 | 0.856 | -0.039 | 0.033 |
| commutenonmotor | -0.0279 | 0.025 | -1.126 | 0.260 | -0.077 | 0.021 |
| commutepublic | -0.0023 | 0.019 | -0.120 | 0.904 | -0.040 | 0.035 |
| commuterail | -0.0205 | 0.013 | -1.630 | 0.103 | -0.045 | 0.004 |
| commutetime | -3.991e-05 | 0.001 | -0.036 | 0.971 | -0.002 | 0.002 |
| commutewalk | -0.0299 | 0.012 | -2.424 | 0.015 | -0.054 | -0.006 |
| confer | 0.0012 | 0.017 | 0.075 | 0.941 | -0.032 | 0.034 |
| creddebt | 0.0038 | 0.004 | 0.990 | 0.322 | -0.004 | 0.011 |
| debtinc | -0.0008 | 0.002 | -0.501 | 0.616 | -0.004 | 0.002 |
| default | 0.0066 | 0.016 | 0.412 | 0.681 | -0.025 | 0.038 |
| ebill | 0.0107 | 0.016 | 0.676 | 0.499 | -0.020 | 0.042 |
| ed | -0.0055 | 0.002 | -2.401 | 0.016 | -0.010 | -0.001 |
| employ | 0.0004 | 0.001 | 0.312 | 0.755 | -0.002 | 0.003 |
| equip | -0.0291 | 0.024 | -1.218 | 0.223 | -0.076 | 0.018 |
| equipten | 2.583e-05 | 1.33e-05 | 1.949 | 0.051 | -1.57e-07 | 5.18e-05 |
| forward | -0.0007 | 0.016 | -0.045 | 0.964 | -0.033 | 0.031 |
| gender | -0.0549 | 0.011 | -4.916 | 0.000 | -0.077 | -0.033 |
| homeown | 0.0022 | 0.012 | 0.178 | 0.859 | -0.022 | 0.026 |
| hometype | 0.0064 | 0.006 | 1.041 | 0.298 | -0.006 | 0.018 |
| hourstv | -0.0003 | 0.001 | -0.235 | 0.814 | -0.003 | 0.002 |
| income | 0.0007 | 0.000 | 1.647 | 0.100 | -0.000 | 0.002 |
| internet | 0.0062 | 0.006 | 1.054 | 0.292 | -0.005 | 0.018 |
| jobcat | -0.0073 | 0.004 | -1.869 | 0.062 | -0.015 | 0.000 |
| jobsat | -0.0049 | 0.005 | -1.046 | 0.296 | -0.014 | 0.004 |
| lninc | 0.2817 | 0.025 | 11.474 | 0.000 | 0.234 | 0.330 |
| lnlongmon | 0.0015 | 0.022 | 0.069 | 0.945 | -0.042 | 0.045 |
| longmon | -0.0002 | 0.001 | -0.125 | 0.901 | -0.003 | 0.002 |
| marital | 0.0087 | 0.015 | 0.562 | 0.574 | -0.022 | 0.039 |
| multline | -0.0214 | 0.014 | -1.494 | 0.135 | -0.050 | 0.007 |
| news | 0.0022 | 0.014 | 0.156 | 0.876 | -0.025 | 0.030 |
| othdebt | 0.0013 | 0.003 | 0.496 | 0.620 | -0.004 | 0.007 |
| owncd | 0.0120 | 0.028 | 0.432 | 0.666 | -0.042 | 0.066 |
| owndvd | 0.0041 | 0.025 | 0.163 | 0.871 | -0.045 | 0.053 |
| ownfax | 0.0017 | 0.019 | 0.089 | 0.929 | -0.035 | 0.038 |
| owngame | -0.0162 | 0.014 | -1.185 | 0.236 | -0.043 | 0.011 |
| ownipod | -0.0108 | 0.013 | -0.805 | 0.421 | -0.037 | 0.016 |
| ownpc | 0.0221 | 0.016 | 1.418 | 0.156 | -0.008 | 0.053 |
| ownpda | 0.0170 | 0.018 | 0.949 | 0.342 | -0.018 | 0.052 |
| owntv | -0.0680 | 0.055 | -1.230 | 0.219 | -0.176 | 0.040 |
| ownvcr | 0.0063 | 0.025 | 0.246 | 0.805 | -0.043 | 0.056 |
| pager | -0.0068 | 0.019 | -0.355 | 0.723 | -0.044 | 0.031 |
| pets | 0.0090 | 0.017 | 0.525 | 0.600 | -0.025 | 0.043 |
| pets_birds | -0.0269 | 0.021 | -1.257 | 0.209 | -0.069 | 0.015 |
| pets_cats | -0.0006 | 0.019 | -0.031 | 0.975 | -0.037 | 0.036 |
| pets_dogs | -0.0091 | 0.019 | -0.478 | 0.633 | -0.046 | 0.028 |
| pets_freshfish | -0.0085 | 0.017 | -0.493 | 0.622 | -0.042 | 0.025 |
| pets_reptiles | 0.0286 | 0.028 | 1.035 | 0.301 | -0.026 | 0.083 |
| pets_saltfish | -0.0275 | 0.041 | -0.665 | 0.506 | -0.109 | 0.054 |
| pets_small | -0.0050 | 0.022 | -0.222 | 0.824 | -0.049 | 0.039 |
| polcontrib | 0.0093 | 0.013 | 0.704 | 0.481 | -0.017 | 0.035 |
| polparty | 0.0021 | 0.012 | 0.181 | 0.857 | -0.021 | 0.025 |
| polview | 0.0036 | 0.004 | 0.880 | 0.379 | -0.004 | 0.012 |
| reason | -0.0010 | 0.002 | -0.537 | 0.591 | -0.005 | 0.003 |
| region | 0.0074 | 0.004 | 1.713 | 0.087 | -0.001 | 0.016 |
| reside | 0.0002 | 0.006 | 0.028 | 0.978 | -0.011 | 0.012 |
| response_01 | -0.0202 | 0.020 | -0.992 | 0.321 | -0.060 | 0.020 |
| response_02 | -0.0025 | 0.017 | -0.150 | 0.881 | -0.035 | 0.030 |
| response_03 | 0.0426 | 0.019 | 2.303 | 0.021 | 0.006 | 0.079 |
| retire | 0.0382 | 0.029 | 1.339 | 0.181 | -0.018 | 0.094 |
| telecommute | 0.0059 | 0.014 | 0.410 | 0.682 | -0.022 | 0.034 |
| tenure | -0.0003 | 0.001 | -0.359 | 0.719 | -0.002 | 0.001 |
| tollfree | 0.0252 | 0.028 | 0.916 | 0.360 | -0.029 | 0.079 |
| tollmon | -0.0004 | 0.002 | -0.243 | 0.808 | -0.004 | 0.003 |
| tollten | -1.427e-05 | 2.16e-05 | -0.661 | 0.508 | -5.66e-05 | 2.8e-05 |
| total_benefit | -0.0026 | 0.004 | -0.747 | 0.455 | -0.010 | 0.004 |
| total_fee | -0.0053 | 0.010 | -0.523 | 0.601 | -0.025 | 0.014 |
| total_items | 0.0931 | 0.001 | 71.747 | 0.000 | 0.091 | 0.096 |
| townsize | -0.0018 | 0.005 | -0.336 | 0.737 | -0.012 | 0.008 |
| union | 0.0116 | 0.016 | 0.748 | 0.455 | -0.019 | 0.042 |
| voice | -0.0354 | 0.018 | -2.004 | 0.045 | -0.070 | -0.001 |
| vote | 0.0008 | 0.011 | 0.067 | 0.946 | -0.021 | 0.023 |
| wireless | 0.0349 | 0.025 | 1.377 | 0.169 | -0.015 | 0.085 |
| wireten | -2.64e-06 | 1.3e-05 | -0.203 | 0.839 | -2.82e-05 | 2.29e-05 |
| Omnibus: | 41.774 | Durbin-Watson: | 1.971 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 42.642 |
| Skew: | 0.226 | Prob(JB): | 5.50e-10 |
| Kurtosis: | 3.031 | Cond. No. | 3.40e+04 |
from scipy import stats
import pylab
stats.probplot( lm.resid, dist="norm", plot=pylab )
pylab.show()
Since in ols model summary Durbin-Watson stats value is 1.971 ~ 2 therefore, there is no autocorrelation.
def red_vif(df,threshold):
all_columns = "+".join(df.columns.difference( ['total_spent'] ))
my_formula = 'total_spent~'+all_columns
# VIF
from statsmodels.stats.outliers_influence import variance_inflation_factor
from patsy import dmatrices
y1, X1 = dmatrices(my_formula, df, return_type='dataframe')
# For each X1, calculate VIF and save in dataframe
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(X1.values, i) for i in range(X1.shape[1])]
vif["features"] = X1.columns
x=vif.sort_values(by = 'VIF Factor', ascending = False)
print(x)
print('___________________________________________________')
variable=list(x['features'])[1]
vif_val=list(x['VIF Factor'])[1]
if vif_val>threshold:
df.drop(variable,axis=1,inplace=True)
red_vif(df,threshold)
else:
return df
df=dataset
df1=red_vif(df,5)
VIF Factor features
0 381.890164 Intercept
69 109.034696 pets
73 89.737475 pets_freshfish
90 21.129178 tollmon
48 13.162839 income
91 12.847341 tollten
15 12.241056 cardten
21 11.958393 carvalue
88 11.592858 tenure
52 10.847066 lninc
14 9.674807 cardmon
53 9.603791 lnlongmon
10 8.831526 carcatvalue
71 7.839853 pets_cats
72 6.815870 pets_dogs
54 6.502625 longmon
3 6.493350 age
89 6.248776 tollfree
16 5.233278 cardtenurecat
100 4.843930 wireten
42 4.519828 equipten
40 4.477064 employ
41 4.240581 equip
58 4.196523 othdebt
99 4.165235 wireless
2 3.922292 address
76 3.842795 pets_small
86 3.380409 retire
70 3.235114 pets_birds
18 2.967996 carown
.. ... ...
11 1.208718 card
33 1.190850 commutewalk
45 1.132343 homeown
1 1.121467 active
29 1.118477 commutenonmotor
4 1.104022 bfast
9 1.097202 carbuy
24 1.061200 commutebike
25 1.061096 commutebus
98 1.055822 vote
87 1.053160 telecommute
79 1.051832 polview
94 1.048225 total_items
83 1.046736 response_01
31 1.041099 commuterail
85 1.040710 response_03
78 1.039945 polparty
27 1.035927 commutecarpool
77 1.034837 polcontrib
46 1.031991 hometype
44 1.028689 gender
30 1.028036 commutepublic
84 1.027646 response_02
96 1.023455 union
17 1.022700 cardtype
93 1.021483 total_fee
80 1.019906 reason
92 1.019080 total_benefit
13 1.017112 card2type
28 1.016627 commutemotorcycle
[101 rows x 2 columns]
___________________________________________________
VIF Factor features
0 381.848134 Intercept
89 21.118010 tollmon
48 13.159790 income
90 12.841805 tollten
15 12.239783 cardten
21 11.958238 carvalue
87 11.582351 tenure
52 10.844814 lninc
14 9.670008 cardmon
53 9.603091 lnlongmon
10 8.828459 carcatvalue
54 6.502429 longmon
3 6.491472 age
88 6.245834 tollfree
16 5.228844 cardtenurecat
99 4.843027 wireten
42 4.519793 equipten
40 4.474640 employ
41 4.240418 equip
58 4.196419 othdebt
98 4.165234 wireless
2 3.921904 address
85 3.379785 retire
18 2.966871 carown
36 2.911247 debtinc
5 2.794187 callcard
35 2.698586 creddebt
49 2.404165 internet
26 2.391187 commutecar
34 2.298401 confer
.. ... ...
24 1.061160 commutebike
25 1.061096 commutebus
97 1.055809 vote
86 1.053068 telecommute
78 1.051466 polview
93 1.047988 total_items
82 1.046578 response_01
31 1.040766 commuterail
84 1.040678 response_03
77 1.039894 polparty
27 1.035866 commutecarpool
76 1.034742 polcontrib
46 1.031825 hometype
44 1.028683 gender
30 1.027900 commutepublic
83 1.027637 response_02
95 1.023368 union
72 1.022788 pets_freshfish
17 1.022652 cardtype
92 1.021208 total_fee
75 1.020962 pets_small
73 1.020343 pets_reptiles
79 1.019843 reason
69 1.019054 pets_birds
91 1.018940 total_benefit
71 1.018136 pets_dogs
13 1.017000 card2type
28 1.016621 commutemotorcycle
70 1.015693 pets_cats
74 1.014080 pets_saltfish
[100 rows x 2 columns]
___________________________________________________
VIF Factor features
0 381.560735 Intercept
48 13.159764 income
21 11.949081 carvalue
15 11.911837 cardten
87 11.242386 tenure
52 10.833617 lninc
53 9.560776 lnlongmon
14 9.287163 cardmon
10 8.827950 carcatvalue
54 6.500626 longmon
3 6.491222 age
16 5.227206 cardtenurecat
98 4.788796 wireten
42 4.502900 equipten
40 4.473027 employ
41 4.221810 equip
58 4.194799 othdebt
97 4.102494 wireless
2 3.920155 address
89 3.829312 tollten
88 3.464667 tollfree
85 3.379445 retire
18 2.966828 carown
36 2.910731 debtinc
5 2.789365 callcard
35 2.698366 creddebt
49 2.395776 internet
26 2.391178 commutecar
34 2.295424 confer
23 2.287933 commute
.. ... ...
24 1.061018 commutebike
25 1.060915 commutebus
96 1.055716 vote
86 1.053038 telecommute
78 1.051237 polview
92 1.047921 total_items
82 1.046060 response_01
31 1.040765 commuterail
84 1.040506 response_03
77 1.039894 polparty
27 1.035147 commutecarpool
76 1.034680 polcontrib
46 1.031824 hometype
44 1.028349 gender
30 1.027833 commutepublic
83 1.026961 response_02
94 1.023146 union
72 1.022657 pets_freshfish
17 1.022648 cardtype
75 1.020658 pets_small
73 1.020331 pets_reptiles
91 1.020269 total_fee
79 1.019842 reason
90 1.018940 total_benefit
69 1.018938 pets_birds
71 1.018023 pets_dogs
13 1.016910 card2type
28 1.016621 commutemotorcycle
70 1.015305 pets_cats
74 1.013976 pets_saltfish
[99 rows x 2 columns]
___________________________________________________
VIF Factor features
0 362.752847 Intercept
15 11.911285 cardten
86 11.242081 tenure
52 9.535391 lnlongmon
14 9.286762 cardmon
21 8.913685 carvalue
10 7.882204 carcatvalue
51 7.477391 lninc
53 6.496170 longmon
3 6.489522 age
16 5.209944 cardtenurecat
97 4.786444 wireten
42 4.502143 equipten
40 4.449988 employ
41 4.219517 equip
96 4.102044 wireless
2 3.915239 address
88 3.825739 tollten
57 3.617903 othdebt
87 3.463258 tollfree
84 3.339436 retire
18 2.962042 carown
5 2.789075 callcard
36 2.492769 debtinc
35 2.439088 creddebt
48 2.395112 internet
26 2.390690 commutecar
34 2.295319 confer
23 2.287786 commute
6 2.247806 callid
.. ... ...
25 1.060871 commutebus
24 1.060815 commutebike
95 1.055711 vote
85 1.052995 telecommute
77 1.051234 polview
91 1.047412 total_items
81 1.045669 response_01
31 1.040676 commuterail
83 1.040064 response_03
76 1.039854 polparty
27 1.035146 commutecarpool
75 1.034674 polcontrib
46 1.031614 hometype
44 1.027970 gender
30 1.027494 commutepublic
82 1.026911 response_02
93 1.023108 union
71 1.022656 pets_freshfish
17 1.022642 cardtype
72 1.020319 pets_reptiles
90 1.020268 total_fee
78 1.019825 reason
74 1.019199 pets_small
68 1.018904 pets_birds
89 1.018847 total_benefit
70 1.017574 pets_dogs
13 1.016910 card2type
28 1.016611 commutemotorcycle
69 1.015305 pets_cats
73 1.013872 pets_saltfish
[98 rows x 2 columns]
___________________________________________________
VIF Factor features
0 362.683196 Intercept
51 9.445282 lnlongmon
85 9.095449 tenure
20 8.912847 carvalue
10 7.880999 carcatvalue
50 7.476901 lninc
3 6.484650 age
52 6.300231 longmon
15 5.168861 cardtenurecat
96 4.754639 wireten
41 4.499635 equipten
39 4.443616 employ
40 4.218969 equip
95 4.086584 wireless
2 3.903717 address
87 3.712565 tollten
56 3.617166 othdebt
86 3.416744 tollfree
83 3.339402 retire
17 2.962032 carown
35 2.492382 debtinc
34 2.438662 creddebt
5 2.398684 callcard
47 2.394871 internet
25 2.390258 commutecar
33 2.289576 confer
22 2.287713 commute
6 2.243823 callid
66 2.210925 pager
18 2.204148 cars
.. ... ...
23 1.060803 commutebike
24 1.060734 commutebus
94 1.055667 vote
84 1.052993 telecommute
76 1.050746 polview
90 1.047408 total_items
80 1.045627 response_01
30 1.040602 commuterail
82 1.040007 response_03
75 1.039796 polparty
26 1.035144 commutecarpool
74 1.034582 polcontrib
45 1.031526 hometype
43 1.027928 gender
29 1.027488 commutepublic
81 1.026187 response_02
92 1.022755 union
16 1.022639 cardtype
70 1.022252 pets_freshfish
71 1.020227 pets_reptiles
89 1.019733 total_fee
77 1.019600 reason
73 1.019195 pets_small
88 1.018562 total_benefit
67 1.018351 pets_birds
69 1.017376 pets_dogs
27 1.016599 commutemotorcycle
13 1.016582 card2type
68 1.015300 pets_cats
72 1.013858 pets_saltfish
[97 rows x 2 columns]
___________________________________________________
VIF Factor features
0 343.538310 Intercept
20 8.904993 carvalue
84 8.388011 tenure
10 7.880981 carcatvalue
50 7.452453 lninc
3 6.482355 age
15 4.983482 cardtenurecat
95 4.753604 wireten
41 4.499634 equipten
39 4.431033 employ
40 4.217920 equip
94 4.086559 wireless
2 3.886818 address
86 3.706088 tollten
55 3.615713 othdebt
85 3.412949 tollfree
82 3.339401 retire
17 2.961793 carown
35 2.491104 debtinc
51 2.446215 longmon
34 2.438655 creddebt
5 2.395999 callcard
47 2.394866 internet
25 2.389344 commutecar
33 2.289472 confer
22 2.286796 commute
6 2.243429 callid
65 2.208809 pager
18 2.204133 cars
7 2.198748 callwait
.. ... ...
24 1.060664 commutebus
23 1.060100 commutebike
93 1.055623 vote
83 1.052941 telecommute
75 1.050673 polview
89 1.047286 total_items
79 1.045616 response_01
30 1.040466 commuterail
81 1.039967 response_03
74 1.039422 polparty
26 1.035006 commutecarpool
73 1.034460 polcontrib
45 1.031152 hometype
43 1.027920 gender
29 1.027477 commutepublic
80 1.026176 response_02
91 1.022735 union
16 1.022418 cardtype
69 1.022105 pets_freshfish
70 1.020054 pets_reptiles
76 1.019529 reason
72 1.019140 pets_small
88 1.018563 total_fee
87 1.018446 total_benefit
66 1.017922 pets_birds
68 1.017121 pets_dogs
13 1.016551 card2type
27 1.015972 commutemotorcycle
67 1.015280 pets_cats
71 1.013324 pets_saltfish
[96 rows x 2 columns]
___________________________________________________
VIF Factor features
0 333.416868 Intercept
83 8.383935 tenure
3 6.476096 age
49 6.424281 lninc
15 4.979484 cardtenurecat
94 4.753296 wireten
10 4.528285 carcatvalue
40 4.498865 equipten
38 4.426303 employ
39 4.216884 equip
93 4.086516 wireless
2 3.886815 address
85 3.705084 tollten
84 3.412845 tollfree
54 3.351497 othdebt
81 3.330207 retire
17 2.942646 carown
50 2.445762 longmon
5 2.395376 callcard
46 2.394220 internet
24 2.389064 commutecar
33 2.381423 creddebt
34 2.340231 debtinc
32 2.289471 confer
21 2.286406 commute
6 2.242663 callid
64 2.208792 pager
18 2.202222 cars
7 2.198678 callwait
41 2.179687 forward
.. ... ...
23 1.060604 commutebus
22 1.060071 commutebike
92 1.055607 vote
82 1.052886 telecommute
74 1.050240 polview
88 1.046947 total_items
78 1.045339 response_01
29 1.040233 commuterail
80 1.039904 response_03
73 1.039385 polparty
25 1.034589 commutecarpool
72 1.034460 polcontrib
44 1.030678 hometype
42 1.027846 gender
28 1.027456 commutepublic
79 1.025961 response_02
90 1.022577 union
16 1.022415 cardtype
68 1.022095 pets_freshfish
69 1.019752 pets_reptiles
75 1.019529 reason
71 1.019139 pets_small
87 1.018460 total_fee
86 1.018256 total_benefit
65 1.017877 pets_birds
67 1.017116 pets_dogs
13 1.016035 card2type
26 1.015968 commutemotorcycle
66 1.015021 pets_cats
70 1.013279 pets_saltfish
[95 rows x 2 columns]
___________________________________________________
VIF Factor features
0 332.952581 Intercept
3 6.424296 age
49 6.391762 lninc
93 4.752921 wireten
10 4.527701 carcatvalue
38 4.395970 employ
40 4.316854 equipten
39 4.143236 equip
92 4.084311 wireless
2 3.857490 address
84 3.397236 tollten
54 3.343901 othdebt
81 3.324311 retire
83 3.301452 tollfree
15 3.143799 cardtenurecat
17 2.942448 carown
46 2.394058 internet
24 2.388850 commutecar
33 2.381401 creddebt
34 2.337089 debtinc
5 2.302263 callcard
32 2.286645 confer
21 2.286404 commute
6 2.241894 callid
64 2.206239 pager
18 2.200890 cars
7 2.198137 callwait
41 2.178189 forward
90 2.170290 voice
50 2.155981 longmon
.. ... ...
23 1.060530 commutebus
22 1.059860 commutebike
91 1.055567 vote
82 1.051811 telecommute
74 1.050022 polview
87 1.046694 total_items
78 1.045238 response_01
29 1.039519 commuterail
80 1.039412 response_03
73 1.039257 polparty
25 1.034434 commutecarpool
72 1.033647 polcontrib
44 1.030382 hometype
28 1.027435 commutepublic
42 1.027397 gender
79 1.025961 response_02
16 1.022350 cardtype
89 1.022085 union
68 1.021603 pets_freshfish
69 1.019750 pets_reptiles
75 1.019511 reason
71 1.019113 pets_small
85 1.018142 total_benefit
86 1.018063 total_fee
65 1.017636 pets_birds
67 1.017048 pets_dogs
26 1.015968 commutemotorcycle
13 1.015825 card2type
66 1.014703 pets_cats
70 1.012487 pets_saltfish
[94 rows x 2 columns]
___________________________________________________
VIF Factor features
0 311.315446 Intercept
48 6.385895 lninc
92 4.751677 wireten
9 4.519943 carcatvalue
39 4.316842 equipten
38 4.142823 equip
91 4.083960 wireless
37 4.003984 employ
83 3.393964 tollten
53 3.342343 othdebt
82 3.297999 tollfree
80 3.044390 retire
16 2.936182 carown
14 2.914777 cardtenurecat
2 2.608239 address
45 2.393930 internet
23 2.387082 commutecar
32 2.380715 creddebt
33 2.330951 debtinc
4 2.299327 callcard
31 2.286618 confer
20 2.286342 commute
5 2.241264 callid
63 2.205347 pager
17 2.200691 cars
6 2.196987 callwait
40 2.178180 forward
89 2.169827 voice
49 2.137035 longmon
13 2.089555 cardmon
.. ... ...
22 1.060400 commutebus
21 1.059844 commutebike
90 1.055542 vote
81 1.051773 telecommute
73 1.050005 polview
86 1.046104 total_items
77 1.045176 response_01
28 1.039482 commuterail
72 1.039217 polparty
79 1.039201 response_03
24 1.034432 commutecarpool
71 1.033597 polcontrib
43 1.029809 hometype
27 1.027432 commutepublic
41 1.027325 gender
78 1.025960 response_02
15 1.022153 cardtype
67 1.021589 pets_freshfish
88 1.021389 union
68 1.019599 pets_reptiles
74 1.019471 reason
70 1.018979 pets_small
84 1.018131 total_benefit
85 1.018036 total_fee
66 1.017048 pets_dogs
64 1.016748 pets_birds
25 1.015838 commutemotorcycle
12 1.015756 card2type
65 1.014458 pets_cats
69 1.012392 pets_saltfish
[93 rows x 2 columns]
___________________________________________________
VIF Factor features
0 245.723081 Intercept
91 4.751653 wireten
39 4.315460 equipten
38 4.140348 equip
90 4.083953 wireless
9 3.715991 carcatvalue
37 3.654167 employ
82 3.393926 tollten
81 3.292507 tollfree
16 2.899215 carown
14 2.895551 cardtenurecat
52 2.632544 othdebt
2 2.601459 address
45 2.393641 internet
23 2.384460 commutecar
4 2.297900 callcard
31 2.286346 confer
20 2.286278 commute
5 2.240905 callid
62 2.205293 pager
79 2.204983 retire
6 2.196876 callwait
17 2.193500 cars
40 2.178123 forward
88 2.169825 voice
48 2.132329 longmon
32 2.128815 creddebt
13 2.088963 cardmon
75 2.048320 reside
49 1.932509 marital
.. ... ...
22 1.060297 commutebus
21 1.059840 commutebike
89 1.052116 vote
80 1.051712 telecommute
72 1.047339 polview
76 1.045094 response_01
85 1.042378 total_items
28 1.039474 commuterail
71 1.039217 polparty
78 1.038725 response_03
24 1.033565 commutecarpool
70 1.033000 polcontrib
43 1.028551 hometype
27 1.027422 commutepublic
41 1.026885 gender
77 1.025959 response_02
15 1.022111 cardtype
66 1.021276 pets_freshfish
87 1.020925 union
67 1.019407 pets_reptiles
73 1.019004 reason
69 1.018683 pets_small
83 1.018129 total_benefit
84 1.018021 total_fee
65 1.017034 pets_dogs
63 1.016727 pets_birds
12 1.015755 card2type
25 1.015321 commutemotorcycle
64 1.014420 pets_cats
68 1.012375 pets_saltfish
[92 rows x 2 columns]
___________________________________________________
dataset=df
all_columns = "+".join(df.columns.difference( ['total_spent'] ))
my_formula = "total_spent~" + all_columns
lm=smf.ols(formula = my_formula, data = dataset).fit()
lm.summary()
| Dep. Variable: | total_spent | R-squared: | 0.624 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.617 |
| Method: | Least Squares | F-statistic: | 89.35 |
| Date: | Tue, 12 Mar 2019 | Prob (F-statistic): | 0.00 |
| Time: | 19:53:48 | Log-Likelihood: | -2457.0 |
| No. Observations: | 5000 | AIC: | 5098. |
| Df Residuals: | 4908 | BIC: | 5698. |
| Df Model: | 91 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 4.9001 | 0.089 | 55.368 | 0.000 | 4.727 | 5.074 |
| active | 0.0059 | 0.012 | 0.497 | 0.619 | -0.017 | 0.029 |
| address | -0.0005 | 0.001 | -0.707 | 0.479 | -0.002 | 0.001 |
| bfast | 0.0069 | 0.007 | 0.962 | 0.336 | -0.007 | 0.021 |
| callcard | -0.0426 | 0.019 | -2.246 | 0.025 | -0.080 | -0.005 |
| callid | 0.0213 | 0.017 | 1.259 | 0.208 | -0.012 | 0.054 |
| callwait | -0.0092 | 0.017 | -0.547 | 0.584 | -0.042 | 0.024 |
| carbought | -0.0255 | 0.013 | -2.034 | 0.042 | -0.050 | -0.001 |
| carbuy | 0.0040 | 0.012 | 0.326 | 0.745 | -0.020 | 0.028 |
| carcatvalue | 0.0616 | 0.010 | 6.118 | 0.000 | 0.042 | 0.081 |
| card | -0.1264 | 0.005 | -24.150 | 0.000 | -0.137 | -0.116 |
| card2 | -0.0696 | 0.005 | -13.123 | 0.000 | -0.080 | -0.059 |
| card2type | 0.0060 | 0.005 | 1.189 | 0.234 | -0.004 | 0.016 |
| cardmon | 0.0006 | 0.001 | 0.971 | 0.331 | -0.001 | 0.002 |
| cardtenurecat | 0.0022 | 0.007 | 0.312 | 0.755 | -0.012 | 0.016 |
| cardtype | 0.0047 | 0.005 | 0.919 | 0.358 | -0.005 | 0.015 |
| carown | -0.0055 | 0.015 | -0.378 | 0.706 | -0.034 | 0.023 |
| cars | 0.0005 | 0.006 | 0.082 | 0.935 | -0.012 | 0.013 |
| cartype | -0.0380 | 0.011 | -3.330 | 0.001 | -0.060 | -0.016 |
| churn | 0.0308 | 0.015 | 2.032 | 0.042 | 0.001 | 0.060 |
| commute | 0.0032 | 0.003 | 1.014 | 0.311 | -0.003 | 0.009 |
| commutebike | -0.0035 | 0.018 | -0.199 | 0.843 | -0.038 | 0.031 |
| commutebus | -0.0073 | 0.012 | -0.616 | 0.538 | -0.030 | 0.016 |
| commutecar | 0.0017 | 0.019 | 0.090 | 0.928 | -0.035 | 0.038 |
| commutecarpool | 0.0027 | 0.013 | 0.206 | 0.837 | -0.023 | 0.028 |
| commutemotorcycle | 0.0034 | 0.019 | 0.181 | 0.856 | -0.033 | 0.040 |
| commutenonmotor | -0.0411 | 0.025 | -1.614 | 0.106 | -0.091 | 0.009 |
| commutepublic | -0.0010 | 0.019 | -0.050 | 0.960 | -0.039 | 0.037 |
| commuterail | -0.0193 | 0.013 | -1.494 | 0.135 | -0.045 | 0.006 |
| commutetime | -0.0003 | 0.001 | -0.302 | 0.763 | -0.003 | 0.002 |
| commutewalk | -0.0271 | 0.013 | -2.140 | 0.032 | -0.052 | -0.002 |
| confer | 0.0016 | 0.017 | 0.093 | 0.926 | -0.032 | 0.035 |
| creddebt | 0.0241 | 0.003 | 6.955 | 0.000 | 0.017 | 0.031 |
| debtinc | -0.0109 | 0.001 | -8.709 | 0.000 | -0.013 | -0.008 |
| default | 0.0030 | 0.016 | 0.187 | 0.852 | -0.029 | 0.035 |
| ebill | 0.0129 | 0.016 | 0.794 | 0.427 | -0.019 | 0.045 |
| ed | 0.0017 | 0.002 | 0.729 | 0.466 | -0.003 | 0.006 |
| employ | 0.0057 | 0.001 | 5.104 | 0.000 | 0.004 | 0.008 |
| equip | -0.0220 | 0.024 | -0.909 | 0.363 | -0.070 | 0.025 |
| equipten | 2.326e-05 | 1.33e-05 | 1.751 | 0.080 | -2.78e-06 | 4.93e-05 |
| forward | -0.0009 | 0.017 | -0.055 | 0.956 | -0.034 | 0.032 |
| gender | -0.0506 | 0.011 | -4.422 | 0.000 | -0.073 | -0.028 |
| homeown | 0.0087 | 0.012 | 0.705 | 0.481 | -0.015 | 0.033 |
| hometype | 0.0029 | 0.006 | 0.472 | 0.637 | -0.009 | 0.015 |
| hourstv | -0.0004 | 0.001 | -0.337 | 0.736 | -0.003 | 0.002 |
| internet | 0.0052 | 0.006 | 0.866 | 0.386 | -0.007 | 0.017 |
| jobcat | -0.0109 | 0.004 | -2.873 | 0.004 | -0.018 | -0.003 |
| jobsat | -0.0012 | 0.005 | -0.256 | 0.798 | -0.011 | 0.008 |
| longmon | -0.0001 | 0.001 | -0.203 | 0.839 | -0.002 | 0.001 |
| marital | 0.0109 | 0.016 | 0.692 | 0.489 | -0.020 | 0.042 |
| multline | -0.0316 | 0.014 | -2.191 | 0.029 | -0.060 | -0.003 |
| news | -0.0037 | 0.014 | -0.266 | 0.790 | -0.031 | 0.024 |
| othdebt | 0.0205 | 0.002 | 9.462 | 0.000 | 0.016 | 0.025 |
| owncd | 0.0555 | 0.028 | 1.972 | 0.049 | 0.000 | 0.111 |
| owndvd | 0.0585 | 0.025 | 2.327 | 0.020 | 0.009 | 0.108 |
| ownfax | 0.0021 | 0.019 | 0.110 | 0.913 | -0.035 | 0.039 |
| owngame | -0.0133 | 0.014 | -0.954 | 0.340 | -0.041 | 0.014 |
| ownipod | -0.0037 | 0.014 | -0.270 | 0.787 | -0.031 | 0.023 |
| ownpc | 0.0166 | 0.016 | 1.036 | 0.300 | -0.015 | 0.048 |
| ownpda | 0.0262 | 0.018 | 1.432 | 0.152 | -0.010 | 0.062 |
| owntv | -0.0509 | 0.056 | -0.901 | 0.367 | -0.162 | 0.060 |
| ownvcr | 0.0504 | 0.026 | 1.972 | 0.049 | 0.000 | 0.100 |
| pager | -0.0108 | 0.020 | -0.552 | 0.581 | -0.049 | 0.028 |
| pets_birds | -0.0158 | 0.012 | -1.286 | 0.198 | -0.040 | 0.008 |
| pets_cats | 0.0080 | 0.007 | 1.153 | 0.249 | -0.006 | 0.022 |
| pets_dogs | 0.0002 | 0.008 | 0.028 | 0.978 | -0.015 | 0.015 |
| pets_freshfish | -0.0001 | 0.002 | -0.068 | 0.946 | -0.004 | 0.004 |
| pets_reptiles | 0.0430 | 0.021 | 2.060 | 0.039 | 0.002 | 0.084 |
| pets_saltfish | -0.0134 | 0.027 | -0.495 | 0.620 | -0.066 | 0.040 |
| pets_small | 0.0077 | 0.012 | 0.653 | 0.514 | -0.015 | 0.031 |
| polcontrib | 0.0144 | 0.013 | 1.068 | 0.286 | -0.012 | 0.041 |
| polparty | 0.0021 | 0.012 | 0.173 | 0.862 | -0.021 | 0.025 |
| polview | 0.0069 | 0.004 | 1.658 | 0.097 | -0.001 | 0.015 |
| reason | -0.0003 | 0.002 | -0.156 | 0.876 | -0.004 | 0.004 |
| region | 0.0075 | 0.004 | 1.693 | 0.090 | -0.001 | 0.016 |
| reside | -0.0024 | 0.006 | -0.401 | 0.688 | -0.014 | 0.009 |
| response_01 | -0.0242 | 0.021 | -1.162 | 0.245 | -0.065 | 0.017 |
| response_02 | -0.0013 | 0.017 | -0.075 | 0.940 | -0.035 | 0.032 |
| response_03 | 0.0494 | 0.019 | 2.603 | 0.009 | 0.012 | 0.087 |
| retire | -0.1921 | 0.024 | -8.128 | 0.000 | -0.238 | -0.146 |
| telecommute | 0.0046 | 0.015 | 0.313 | 0.754 | -0.024 | 0.034 |
| tollfree | 0.0257 | 0.021 | 1.251 | 0.211 | -0.015 | 0.066 |
| tollten | -1.228e-05 | 1.14e-05 | -1.080 | 0.280 | -3.46e-05 | 1e-05 |
| total_benefit | -0.0027 | 0.004 | -0.735 | 0.462 | -0.010 | 0.004 |
| total_fee | -0.0064 | 0.010 | -0.624 | 0.533 | -0.027 | 0.014 |
| total_items | 0.0943 | 0.001 | 71.067 | 0.000 | 0.092 | 0.097 |
| townsize | -0.0009 | 0.005 | -0.167 | 0.867 | -0.011 | 0.010 |
| union | 0.0167 | 0.016 | 1.049 | 0.294 | -0.015 | 0.048 |
| voice | -0.0359 | 0.018 | -1.981 | 0.048 | -0.071 | -0.000 |
| vote | 0.0112 | 0.012 | 0.967 | 0.333 | -0.012 | 0.034 |
| wireless | 0.0306 | 0.026 | 1.187 | 0.235 | -0.020 | 0.081 |
| wireten | -1.736e-07 | 1.32e-05 | -0.013 | 0.990 | -2.61e-05 | 2.58e-05 |
| Omnibus: | 50.803 | Durbin-Watson: | 1.968 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 52.161 |
| Skew: | 0.250 | Prob(JB): | 4.71e-12 |
| Kurtosis: | 3.022 | Cond. No. | 2.46e+04 |
X=dataset.drop('total_spent',axis=1)
y=dataset.loc[:,'total_spent'].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
from sklearn.feature_selection import f_regression
F_values, p_values = f_regression( X_train, y_train )
li = [p for i,p in enumerate(p_values)]
len([i for i in li if i > 0.01])
53
drop_index = [i for i,p in enumerate(p_values) if p>0.01]
drop_index
[0, 1, 4, 5, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 49, 52, 53, 54, 55, 56, 61, 62, 73, 75, 81, 82, 84, 85, 86, 88, 89]
dataset.drop(dataset.iloc[:,drop_index],axis=1,inplace=True)
dataset['total_spent']=y
dataset.columns
Index(['gender', 'ed', 'employ', 'retire', 'creddebt', 'othdebt', 'jobsat',
'homeown', 'address', 'carown', 'carcatvalue', 'vote', 'card',
'cardtenurecat', 'card2', 'tollfree', 'tollten', 'equip', 'equipten',
'wireless', 'wireten', 'multline', 'voice', 'pager', 'internet',
'callid', 'callwait', 'forward', 'confer', 'owntv', 'ownvcr', 'owndvd',
'owncd', 'ownpda', 'ownpc', 'ownfax', 'response_03', 'total_fee',
'total_items', 'total_spent'],
dtype='object')
dataset.shape
(5000, 40)
X=dataset.drop('total_spent',axis=1)
y=dataset.loc[:,'total_spent'].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
- LinearRegression
- Lasso
- Ridge
- KNN
- SVM
- Decision Trees
- Random Forest
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
models = []
models.append(('LR', LinearRegression()))
models.append(('LASSO', Lasso()))
models.append(('RIDGE', Ridge()))
models.append(('KNN', KNeighborsRegressor()))
models.append(('RandomForest',RandomForestRegressor()))
models.append(('CART', DecisionTreeRegressor()))
models.append(('SVR', SVR()))
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
# evaluate each model in turn
reg_result = []
model_name = []
score_mean = []
score_std = []
r2_score = []
num_folds=4
scoring='neg_mean_squared_error'
for name, model in models:
kfold = KFold(n_splits=num_folds, random_state=0)
cv_results = cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
reg=model.fit(X_train,y_train)
reg_r=reg.predict(X_test)
r2_score.append(metrics.r2_score(y_test,reg_r))
reg_result.append(cv_results)
model_name.append(name)
score_mean.append(cv_results.mean())
score_std.append(cv_results.std())
result_table =pd.DataFrame()
result_table['Model_name']=model_name
result_table['r2_score']=r2_score
result_table['Score_mean']=score_mean
result_table['Score std']=score_std
result_table.sort_values(by='r2_score',ascending=False)
| Model_name | r2_score | Score_mean | Score std | |
|---|---|---|---|---|
| 4 | RandomForest | 0.600363 | -0.154560 | 0.006614 |
| 2 | RIDGE | 0.587672 | -0.164575 | 0.012388 |
| 0 | LR | 0.587622 | -0.164587 | 0.012385 |
| 1 | LASSO | 0.340470 | -0.269052 | 0.025086 |
| 5 | CART | 0.269090 | -0.274875 | 0.012122 |
| 6 | SVR | 0.140013 | -0.337415 | 0.026968 |
| 3 | KNN | 0.024554 | -0.374530 | 0.026688 |
- we will build model using RandomForestregressor
rf=RandomForestRegressor()
rf.fit(X_train,y_train)
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
max_features='auto', max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
oob_score=False, random_state=None, verbose=0, warm_start=False)
# Training set
y_pred_train = rf.predict(X_train)
rmse_train = np.sqrt( metrics.mean_squared_error( y_train, y_pred_train) )
print(rmse_train)
print(metrics.r2_score( y_train, y_pred_train ))
0.16636658826400147 0.9337516454082173
# Test set
y_pred = rf.predict( X_test )
rmse = np.sqrt( metrics.mean_squared_error( y_test, y_pred ) )
print(rmse)
print(metrics.r2_score( y_test, y_pred ))
0.4063972525309021 0.5936944076380557
colnames=list(X.columns)
colvalues = list(rf.feature_importances_)
ce=pd.DataFrame({'names': colnames,
'coefficients': colvalues
})
print("RMSE ",rmse)
print("R2 metrics ",metrics.r2_score( y_test, y_pred ))
RMSE 0.4063972525309021 R2 metrics 0.5936944076380557
ce
| names | coefficients | |
|---|---|---|
| 0 | gender | 0.004379 |
| 1 | ed | 0.021743 |
| 2 | employ | 0.024177 |
| 3 | retire | 0.002944 |
| 4 | creddebt | 0.043304 |
| 5 | othdebt | 0.055941 |
| 6 | jobsat | 0.012323 |
| 7 | homeown | 0.003887 |
| 8 | address | 0.027670 |
| 9 | carown | 0.004900 |
| 10 | carcatvalue | 0.051930 |
| 11 | vote | 0.004359 |
| 12 | card | 0.087901 |
| 13 | cardtenurecat | 0.006805 |
| 14 | card2 | 0.034295 |
| 15 | tollfree | 0.001856 |
| 16 | tollten | 0.017930 |
| 17 | equip | 0.000997 |
| 18 | equipten | 0.011900 |
| 19 | wireless | 0.000836 |
| 20 | wireten | 0.008980 |
| 21 | multline | 0.004459 |
| 22 | voice | 0.002200 |
| 23 | pager | 0.002381 |
| 24 | internet | 0.008609 |
| 25 | callid | 0.003494 |
| 26 | callwait | 0.003285 |
| 27 | forward | 0.003011 |
| 28 | confer | 0.003735 |
| 29 | owntv | 0.000418 |
| 30 | ownvcr | 0.001700 |
| 31 | owndvd | 0.002245 |
| 32 | owncd | 0.001447 |
| 33 | ownpda | 0.002271 |
| 34 | ownpc | 0.003395 |
| 35 | ownfax | 0.001907 |
| 36 | response_03 | 0.003106 |
| 37 | total_fee | 0.005109 |
| 38 | total_items | 0.518171 |
ce1 = ce.head(30)
ce1.coefficients.plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x1a59a2bf438>